mirror of
https://github.com/harivansh-afk/clanker-agent.git
synced 2026-04-15 05:02:07 +00:00
computer use tool
This commit is contained in:
parent
3919bbf708
commit
e1bba1c1a5
9 changed files with 911 additions and 17 deletions
|
|
@ -325,6 +325,7 @@ ${chalk.bold("Environment Variables:")}
|
||||||
AWS_REGION - AWS region for Amazon Bedrock (e.g., us-east-1)
|
AWS_REGION - AWS region for Amazon Bedrock (e.g., us-east-1)
|
||||||
${ENV_AGENT_DIR.padEnd(32)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent)
|
${ENV_AGENT_DIR.padEnd(32)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent)
|
||||||
COMPANION_PACKAGE_DIR - Override package directory (for Nix/Guix store paths)
|
COMPANION_PACKAGE_DIR - Override package directory (for Nix/Guix store paths)
|
||||||
|
COMPANION_AGENT_COMPUTER_COMMAND - Override the computer helper command (default: agent-computer)
|
||||||
COMPANION_OFFLINE - Disable startup network operations when set to 1/true/yes
|
COMPANION_OFFLINE - Disable startup network operations when set to 1/true/yes
|
||||||
COMPANION_SHARE_VIEWER_URL - Base URL for /share command (default: https://companion.dev/session/)
|
COMPANION_SHARE_VIEWER_URL - Base URL for /share command (default: https://companion.dev/session/)
|
||||||
COMPANION_AI_ANTIGRAVITY_VERSION - Override Antigravity User-Agent version (e.g., 1.23.0)
|
COMPANION_AI_ANTIGRAVITY_VERSION - Override Antigravity User-Agent version (e.g., 1.23.0)
|
||||||
|
|
@ -333,6 +334,7 @@ ${chalk.bold(`Available Tools (default: ${defaultToolsText}):`)}
|
||||||
read - Read file contents
|
read - Read file contents
|
||||||
bash - Execute bash commands
|
bash - Execute bash commands
|
||||||
browser - Browser automation with persistent state
|
browser - Browser automation with persistent state
|
||||||
|
computer - Desktop computer automation with screen observation and native UI control
|
||||||
edit - Edit files with find/replace
|
edit - Edit files with find/replace
|
||||||
write - Write files (creates/overwrites)
|
write - Write files (creates/overwrites)
|
||||||
grep - Search file contents (read-only, off by default)
|
grep - Search file contents (read-only, off by default)
|
||||||
|
|
|
||||||
|
|
@ -26,10 +26,12 @@ import {
|
||||||
allTools,
|
allTools,
|
||||||
bashTool,
|
bashTool,
|
||||||
browserTool,
|
browserTool,
|
||||||
|
computerTool,
|
||||||
codingTools,
|
codingTools,
|
||||||
defaultCodingToolNames,
|
defaultCodingToolNames,
|
||||||
createBashTool,
|
createBashTool,
|
||||||
createBrowserTool,
|
createBrowserTool,
|
||||||
|
createComputerTool,
|
||||||
createCodingTools,
|
createCodingTools,
|
||||||
createEditTool,
|
createEditTool,
|
||||||
createFindTool,
|
createFindTool,
|
||||||
|
|
@ -67,7 +69,7 @@ export interface CreateAgentSessionOptions {
|
||||||
/** Models available for cycling (Ctrl+P in interactive mode) */
|
/** Models available for cycling (Ctrl+P in interactive mode) */
|
||||||
scopedModels?: Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }>;
|
scopedModels?: Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }>;
|
||||||
|
|
||||||
/** Built-in tools to use. Default: codingTools [read, bash, browser, edit, write] */
|
/** Built-in tools to use. Default: codingTools [read, bash, browser, computer, edit, write] */
|
||||||
tools?: Tool[];
|
tools?: Tool[];
|
||||||
/** Custom tools to register (in addition to built-in tools). */
|
/** Custom tools to register (in addition to built-in tools). */
|
||||||
customTools?: ToolDefinition[];
|
customTools?: ToolDefinition[];
|
||||||
|
|
@ -113,6 +115,7 @@ export {
|
||||||
readTool,
|
readTool,
|
||||||
bashTool,
|
bashTool,
|
||||||
browserTool,
|
browserTool,
|
||||||
|
computerTool,
|
||||||
editTool,
|
editTool,
|
||||||
writeTool,
|
writeTool,
|
||||||
grepTool,
|
grepTool,
|
||||||
|
|
@ -127,6 +130,7 @@ export {
|
||||||
createReadTool,
|
createReadTool,
|
||||||
createBashTool,
|
createBashTool,
|
||||||
createBrowserTool,
|
createBrowserTool,
|
||||||
|
createComputerTool,
|
||||||
createEditTool,
|
createEditTool,
|
||||||
createWriteTool,
|
createWriteTool,
|
||||||
createGrepTool,
|
createGrepTool,
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ const toolDescriptions: Record<string, string> = {
|
||||||
bash: "Run shell commands",
|
bash: "Run shell commands",
|
||||||
browser:
|
browser:
|
||||||
"Browse the web: open, snapshot, click, fill, wait, screenshot, save/load state",
|
"Browse the web: open, snapshot, click, fill, wait, screenshot, save/load state",
|
||||||
|
computer:
|
||||||
|
"Use the desktop computer: observe the screen, click, type, send hotkeys, manage apps/windows, wait for native UI, and read/write the clipboard",
|
||||||
edit: "Surgical file edits (find exact text, replace it)",
|
edit: "Surgical file edits (find exact text, replace it)",
|
||||||
write: "Create new files or completely rewrite existing ones",
|
write: "Create new files or completely rewrite existing ones",
|
||||||
grep: "Search file contents by regex (respects .gitignore)",
|
grep: "Search file contents by regex (respects .gitignore)",
|
||||||
|
|
@ -167,6 +169,7 @@ export function buildSystemPrompt(
|
||||||
|
|
||||||
const hasBash = tools.includes("bash");
|
const hasBash = tools.includes("bash");
|
||||||
const hasBrowser = tools.includes("browser");
|
const hasBrowser = tools.includes("browser");
|
||||||
|
const hasComputer = tools.includes("computer");
|
||||||
const hasEdit = tools.includes("edit");
|
const hasEdit = tools.includes("edit");
|
||||||
const hasWrite = tools.includes("write");
|
const hasWrite = tools.includes("write");
|
||||||
const hasGrep = tools.includes("grep");
|
const hasGrep = tools.includes("grep");
|
||||||
|
|
@ -215,6 +218,16 @@ export function buildSystemPrompt(
|
||||||
"Browser: snapshot before interacting with elements. Use it for research and learning too, not just automation",
|
"Browser: snapshot before interacting with elements. Use it for research and learning too, not just automation",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if (hasComputer) {
|
||||||
|
addGuideline(
|
||||||
|
"Computer: observe before interacting. Use it for native UI, desktop apps, file pickers, downloads, and OS dialogs",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (hasBrowser && hasComputer) {
|
||||||
|
addGuideline(
|
||||||
|
"Prefer browser for websites and DOM-aware tasks. Switch to computer when native UI or desktop state matters",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Output hygiene
|
// Output hygiene
|
||||||
if (hasEdit || hasWrite) {
|
if (hasEdit || hasWrite) {
|
||||||
|
|
|
||||||
666
packages/coding-agent/src/core/tools/computer.ts
Normal file
666
packages/coding-agent/src/core/tools/computer.ts
Normal file
|
|
@ -0,0 +1,666 @@
|
||||||
|
import { spawn } from "node:child_process";
|
||||||
|
import { mkdirSync } from "node:fs";
|
||||||
|
import { join, resolve } from "node:path";
|
||||||
|
import type { AgentTool } from "@mariozechner/companion-agent-core";
|
||||||
|
import { type Static, Type } from "@sinclair/typebox";
|
||||||
|
import { getAgentDir } from "../../config.js";
|
||||||
|
import {
|
||||||
|
getShellEnv,
|
||||||
|
killProcessTree,
|
||||||
|
sanitizeBinaryOutput,
|
||||||
|
} from "../../utils/shell.js";
|
||||||
|
|
||||||
|
const computerActions = [
|
||||||
|
"observe",
|
||||||
|
"click",
|
||||||
|
"type",
|
||||||
|
"hotkey",
|
||||||
|
"scroll",
|
||||||
|
"drag",
|
||||||
|
"wait",
|
||||||
|
"app_list",
|
||||||
|
"app_open",
|
||||||
|
"app_focus",
|
||||||
|
"window_list",
|
||||||
|
"window_focus",
|
||||||
|
"window_move",
|
||||||
|
"window_resize",
|
||||||
|
"window_close",
|
||||||
|
"clipboard_read",
|
||||||
|
"clipboard_write",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
const computerObservationModes = ["hybrid", "ocr", "accessibility"] as const;
|
||||||
|
|
||||||
|
const DEFAULT_COMPUTER_COMMAND =
|
||||||
|
process.env.COMPANION_AGENT_COMPUTER_COMMAND || "agent-computer";
|
||||||
|
const DEFAULT_COMPUTER_TIMEOUT_SECONDS = 90;
|
||||||
|
|
||||||
|
const computerSchema = Type.Object({
|
||||||
|
action: Type.Union(
|
||||||
|
computerActions.map((action) => Type.Literal(action)),
|
||||||
|
{ description: "Computer action to execute" },
|
||||||
|
),
|
||||||
|
snapshotId: Type.Optional(
|
||||||
|
Type.String({ description: "Snapshot ID returned from observe" }),
|
||||||
|
),
|
||||||
|
ref: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description:
|
||||||
|
"Target ref from observe output, such as w1 for a window or t3 for OCR text",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
x: Type.Optional(Type.Number({ description: "Target x coordinate" })),
|
||||||
|
y: Type.Optional(Type.Number({ description: "Target y coordinate" })),
|
||||||
|
toRef: Type.Optional(
|
||||||
|
Type.String({ description: "Destination ref for drag actions" }),
|
||||||
|
),
|
||||||
|
toX: Type.Optional(
|
||||||
|
Type.Number({ description: "Destination x coordinate for drag actions" }),
|
||||||
|
),
|
||||||
|
toY: Type.Optional(
|
||||||
|
Type.Number({ description: "Destination y coordinate for drag actions" }),
|
||||||
|
),
|
||||||
|
text: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description:
|
||||||
|
"Text to type, text to wait for, or clipboard contents depending on action",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
keys: Type.Optional(
|
||||||
|
Type.Array(Type.String(), {
|
||||||
|
description: "Hotkey chord or key sequence, for example ['ctrl', 'l']",
|
||||||
|
minItems: 1,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
app: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description:
|
||||||
|
"Installed app or running app name/class for app_open, app_focus, and wait",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
windowId: Type.Optional(
|
||||||
|
Type.String({ description: "Window ID, such as 0x04200007" }),
|
||||||
|
),
|
||||||
|
windowTitle: Type.Optional(
|
||||||
|
Type.String({ description: "Window title substring to match" }),
|
||||||
|
),
|
||||||
|
mode: Type.Optional(
|
||||||
|
Type.Union(
|
||||||
|
computerObservationModes.map((mode) => Type.Literal(mode)),
|
||||||
|
{ description: "Observation mode. Defaults to hybrid." },
|
||||||
|
),
|
||||||
|
),
|
||||||
|
amount: Type.Optional(
|
||||||
|
Type.Number({
|
||||||
|
description:
|
||||||
|
"Scroll amount in wheel steps. Positive scrolls down/right, negative scrolls up/left.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
width: Type.Optional(
|
||||||
|
Type.Number({ description: "Target window width for resize actions" }),
|
||||||
|
),
|
||||||
|
height: Type.Optional(
|
||||||
|
Type.Number({ description: "Target window height for resize actions" }),
|
||||||
|
),
|
||||||
|
clear: Type.Optional(
|
||||||
|
Type.Boolean({
|
||||||
|
description: "Clear the active input field before typing",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
button: Type.Optional(
|
||||||
|
Type.Number({
|
||||||
|
description: "Mouse button for click or drag. Defaults to 1.",
|
||||||
|
minimum: 1,
|
||||||
|
maximum: 7,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
timeoutMs: Type.Optional(
|
||||||
|
Type.Number({
|
||||||
|
description: "Wait timeout in milliseconds for observe-derived waits",
|
||||||
|
minimum: 0,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
intervalMs: Type.Optional(
|
||||||
|
Type.Number({
|
||||||
|
description: "Polling interval for wait actions in milliseconds",
|
||||||
|
minimum: 10,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type ComputerToolAction = (typeof computerActions)[number];
|
||||||
|
export type ComputerObservationMode = (typeof computerObservationModes)[number];
|
||||||
|
export type ComputerToolInput = Static<typeof computerSchema>;
|
||||||
|
|
||||||
|
export interface ComputerToolDetails {
|
||||||
|
action: ComputerToolAction;
|
||||||
|
command: string;
|
||||||
|
args: string[];
|
||||||
|
stateDir: string;
|
||||||
|
snapshotId?: string;
|
||||||
|
screenshotPath?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ComputerOperations {
|
||||||
|
exec: (
|
||||||
|
command: string,
|
||||||
|
args: string[],
|
||||||
|
options: {
|
||||||
|
cwd: string;
|
||||||
|
env: NodeJS.ProcessEnv;
|
||||||
|
onData: (data: Buffer) => void;
|
||||||
|
signal?: AbortSignal;
|
||||||
|
timeout?: number;
|
||||||
|
},
|
||||||
|
) => Promise<{ exitCode: number | null }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultComputerOperations: ComputerOperations = {
|
||||||
|
exec: (command, args, { cwd, env, onData, signal, timeout }) => {
|
||||||
|
return new Promise((resolvePromise, rejectPromise) => {
|
||||||
|
const child = spawn(command, args, {
|
||||||
|
cwd,
|
||||||
|
detached: true,
|
||||||
|
env,
|
||||||
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
|
});
|
||||||
|
|
||||||
|
let timedOut = false;
|
||||||
|
let timeoutHandle: NodeJS.Timeout | undefined;
|
||||||
|
|
||||||
|
if (timeout !== undefined && timeout > 0) {
|
||||||
|
timeoutHandle = setTimeout(() => {
|
||||||
|
timedOut = true;
|
||||||
|
if (child.pid) {
|
||||||
|
killProcessTree(child.pid);
|
||||||
|
}
|
||||||
|
}, timeout * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child.stdout) {
|
||||||
|
child.stdout.on("data", onData);
|
||||||
|
}
|
||||||
|
if (child.stderr) {
|
||||||
|
child.stderr.on("data", onData);
|
||||||
|
}
|
||||||
|
|
||||||
|
const onAbort = () => {
|
||||||
|
if (child.pid) {
|
||||||
|
killProcessTree(child.pid);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (signal) {
|
||||||
|
if (signal.aborted) {
|
||||||
|
onAbort();
|
||||||
|
} else {
|
||||||
|
signal.addEventListener("abort", onAbort, { once: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
child.on("error", (error) => {
|
||||||
|
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||||
|
if (signal) signal.removeEventListener("abort", onAbort);
|
||||||
|
rejectPromise(error);
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on("close", (code) => {
|
||||||
|
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||||
|
if (signal) signal.removeEventListener("abort", onAbort);
|
||||||
|
|
||||||
|
if (signal?.aborted) {
|
||||||
|
rejectPromise(new Error("aborted"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (timedOut) {
|
||||||
|
rejectPromise(new Error(`timeout:${timeout}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
resolvePromise({ exitCode: code });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface ComputerToolOptions {
|
||||||
|
operations?: ComputerOperations;
|
||||||
|
command?: string;
|
||||||
|
defaultTimeoutSeconds?: number;
|
||||||
|
stateDir?: string;
|
||||||
|
agentDir?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ComputerCommandContext {
|
||||||
|
action: ComputerToolAction;
|
||||||
|
args: string[];
|
||||||
|
statusMessage: string;
|
||||||
|
successMessage: string;
|
||||||
|
stateDir: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveCommandPath(cwd: string, inputPath: string): string {
|
||||||
|
return resolve(cwd, inputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getComputerRootDir(options?: ComputerToolOptions): string {
|
||||||
|
const baseAgentDir = options?.agentDir ?? getAgentDir();
|
||||||
|
return join(baseAgentDir, "computer");
|
||||||
|
}
|
||||||
|
|
||||||
|
function getComputerStateDir(
|
||||||
|
cwd: string,
|
||||||
|
options?: ComputerToolOptions,
|
||||||
|
): string {
|
||||||
|
const stateDir = options?.stateDir ?? getComputerRootDir(options);
|
||||||
|
return resolveCommandPath(cwd, stateDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureComputerDir(stateDir: string): void {
|
||||||
|
mkdirSync(stateDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeOutput(chunks: Buffer[]): string {
|
||||||
|
return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasCoordinateTarget(input: ComputerToolInput): boolean {
|
||||||
|
return input.x !== undefined && input.y !== undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasRefTarget(input: ComputerToolInput): boolean {
|
||||||
|
return input.snapshotId !== undefined && input.ref !== undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasWindowTarget(input: ComputerToolInput): boolean {
|
||||||
|
return input.windowId !== undefined || input.windowTitle !== undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasDragDestination(input: ComputerToolInput): boolean {
|
||||||
|
return (
|
||||||
|
input.toRef !== undefined ||
|
||||||
|
(input.toX !== undefined && input.toY !== undefined)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function validateWaitInput(input: ComputerToolInput): void {
|
||||||
|
const targetCount =
|
||||||
|
(input.ref !== undefined ? 1 : 0) +
|
||||||
|
(input.text !== undefined ? 1 : 0) +
|
||||||
|
(input.app !== undefined ? 1 : 0) +
|
||||||
|
(input.windowId !== undefined ? 1 : 0) +
|
||||||
|
(input.windowTitle !== undefined ? 1 : 0);
|
||||||
|
|
||||||
|
if (targetCount === 0 && input.timeoutMs === undefined) {
|
||||||
|
throw new Error(
|
||||||
|
"computer wait requires one of ref, text, app, windowId, windowTitle, or timeoutMs",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targetCount > 1) {
|
||||||
|
throw new Error(
|
||||||
|
"computer wait requires exactly one of ref, text, app, windowId, or windowTitle",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function validateComputerInput(input: ComputerToolInput): void {
|
||||||
|
switch (input.action) {
|
||||||
|
case "observe":
|
||||||
|
case "app_list":
|
||||||
|
case "window_list":
|
||||||
|
case "clipboard_read":
|
||||||
|
return;
|
||||||
|
case "click":
|
||||||
|
if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
|
||||||
|
throw new Error(
|
||||||
|
"computer click requires snapshotId and ref, or explicit x and y coordinates",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "type":
|
||||||
|
if (input.text === undefined) {
|
||||||
|
throw new Error("computer type requires text");
|
||||||
|
}
|
||||||
|
if (input.ref !== undefined && input.snapshotId === undefined) {
|
||||||
|
throw new Error("computer type with ref requires snapshotId");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "hotkey":
|
||||||
|
if (!input.keys || input.keys.length === 0) {
|
||||||
|
throw new Error("computer hotkey requires keys");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "scroll":
|
||||||
|
if (input.amount === undefined || input.amount === 0) {
|
||||||
|
throw new Error("computer scroll requires a non-zero amount");
|
||||||
|
}
|
||||||
|
if (input.ref !== undefined && input.snapshotId === undefined) {
|
||||||
|
throw new Error("computer scroll with ref requires snapshotId");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "drag":
|
||||||
|
if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
|
||||||
|
throw new Error(
|
||||||
|
"computer drag requires a starting target via snapshotId and ref, or x and y coordinates",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (!hasDragDestination(input)) {
|
||||||
|
throw new Error(
|
||||||
|
"computer drag requires a destination via toRef, or explicit toX and toY coordinates",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (input.toRef !== undefined && input.snapshotId === undefined) {
|
||||||
|
throw new Error("computer drag with toRef requires snapshotId");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "wait":
|
||||||
|
validateWaitInput(input);
|
||||||
|
if (input.ref !== undefined && input.snapshotId === undefined) {
|
||||||
|
throw new Error("computer wait with ref requires snapshotId");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "app_open":
|
||||||
|
case "app_focus":
|
||||||
|
if (!input.app) {
|
||||||
|
throw new Error(`computer ${input.action} requires app`);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "window_focus":
|
||||||
|
case "window_close":
|
||||||
|
if (!hasWindowTarget(input)) {
|
||||||
|
throw new Error(
|
||||||
|
`computer ${input.action} requires windowId or windowTitle`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "window_move":
|
||||||
|
if (!hasWindowTarget(input)) {
|
||||||
|
throw new Error(
|
||||||
|
"computer window_move requires windowId or windowTitle",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (input.x === undefined || input.y === undefined) {
|
||||||
|
throw new Error("computer window_move requires x and y");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "window_resize":
|
||||||
|
if (!hasWindowTarget(input)) {
|
||||||
|
throw new Error(
|
||||||
|
"computer window_resize requires windowId or windowTitle",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (input.width === undefined || input.height === undefined) {
|
||||||
|
throw new Error("computer window_resize requires width and height");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case "clipboard_write":
|
||||||
|
if (input.text === undefined) {
|
||||||
|
throw new Error("computer clipboard_write requires text");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
default: {
|
||||||
|
const unsupportedAction: never = input.action;
|
||||||
|
throw new Error(`Unsupported computer action: ${unsupportedAction}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function describeAction(input: ComputerToolInput): {
|
||||||
|
statusMessage: string;
|
||||||
|
successMessage: string;
|
||||||
|
} {
|
||||||
|
switch (input.action) {
|
||||||
|
case "observe":
|
||||||
|
return {
|
||||||
|
statusMessage: "Observing desktop...",
|
||||||
|
successMessage: "Captured desktop snapshot",
|
||||||
|
};
|
||||||
|
case "click":
|
||||||
|
return {
|
||||||
|
statusMessage: "Clicking desktop target...",
|
||||||
|
successMessage: "Clicked desktop target",
|
||||||
|
};
|
||||||
|
case "type":
|
||||||
|
return {
|
||||||
|
statusMessage: "Typing into desktop...",
|
||||||
|
successMessage: "Typed into desktop",
|
||||||
|
};
|
||||||
|
case "hotkey":
|
||||||
|
return {
|
||||||
|
statusMessage: "Sending hotkey...",
|
||||||
|
successMessage: "Sent hotkey",
|
||||||
|
};
|
||||||
|
case "scroll":
|
||||||
|
return {
|
||||||
|
statusMessage: "Scrolling desktop...",
|
||||||
|
successMessage: "Scrolled desktop",
|
||||||
|
};
|
||||||
|
case "drag":
|
||||||
|
return {
|
||||||
|
statusMessage: "Dragging desktop target...",
|
||||||
|
successMessage: "Dragged desktop target",
|
||||||
|
};
|
||||||
|
case "wait":
|
||||||
|
return {
|
||||||
|
statusMessage: "Waiting for desktop state...",
|
||||||
|
successMessage: "Desktop wait condition satisfied",
|
||||||
|
};
|
||||||
|
case "app_list":
|
||||||
|
return {
|
||||||
|
statusMessage: "Listing apps...",
|
||||||
|
successMessage: "Listed apps",
|
||||||
|
};
|
||||||
|
case "app_open":
|
||||||
|
return {
|
||||||
|
statusMessage: `Opening app ${input.app}...`,
|
||||||
|
successMessage: `Opened app ${input.app}`,
|
||||||
|
};
|
||||||
|
case "app_focus":
|
||||||
|
return {
|
||||||
|
statusMessage: `Focusing app ${input.app}...`,
|
||||||
|
successMessage: `Focused app ${input.app}`,
|
||||||
|
};
|
||||||
|
case "window_list":
|
||||||
|
return {
|
||||||
|
statusMessage: "Listing windows...",
|
||||||
|
successMessage: "Listed windows",
|
||||||
|
};
|
||||||
|
case "window_focus":
|
||||||
|
return {
|
||||||
|
statusMessage: "Focusing window...",
|
||||||
|
successMessage: "Focused window",
|
||||||
|
};
|
||||||
|
case "window_move":
|
||||||
|
return {
|
||||||
|
statusMessage: "Moving window...",
|
||||||
|
successMessage: "Moved window",
|
||||||
|
};
|
||||||
|
case "window_resize":
|
||||||
|
return {
|
||||||
|
statusMessage: "Resizing window...",
|
||||||
|
successMessage: "Resized window",
|
||||||
|
};
|
||||||
|
case "window_close":
|
||||||
|
return {
|
||||||
|
statusMessage: "Closing window...",
|
||||||
|
successMessage: "Closed window",
|
||||||
|
};
|
||||||
|
case "clipboard_read":
|
||||||
|
return {
|
||||||
|
statusMessage: "Reading clipboard...",
|
||||||
|
successMessage: "Read clipboard",
|
||||||
|
};
|
||||||
|
case "clipboard_write":
|
||||||
|
return {
|
||||||
|
statusMessage: "Writing clipboard...",
|
||||||
|
successMessage: "Wrote clipboard",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildComputerCommand(
|
||||||
|
cwd: string,
|
||||||
|
input: ComputerToolInput,
|
||||||
|
options?: ComputerToolOptions,
|
||||||
|
): ComputerCommandContext {
|
||||||
|
validateComputerInput(input);
|
||||||
|
|
||||||
|
const stateDir = getComputerStateDir(cwd, options);
|
||||||
|
ensureComputerDir(stateDir);
|
||||||
|
const actionDescription = describeAction(input);
|
||||||
|
|
||||||
|
return {
|
||||||
|
action: input.action,
|
||||||
|
args: ["--state-dir", stateDir, "--input", JSON.stringify(input)],
|
||||||
|
statusMessage: actionDescription.statusMessage,
|
||||||
|
successMessage: actionDescription.successMessage,
|
||||||
|
stateDir,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildComputerErrorMessage(
|
||||||
|
action: ComputerToolAction,
|
||||||
|
output: string,
|
||||||
|
exitCode: number | null,
|
||||||
|
): string {
|
||||||
|
const base =
|
||||||
|
exitCode === null
|
||||||
|
? `Computer action "${action}" failed`
|
||||||
|
: `Computer action "${action}" exited with code ${exitCode}`;
|
||||||
|
return output.length > 0 ? `${output}\n\n${base}` : base;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getMissingComputerCommandMessage(command: string): string {
|
||||||
|
return [
|
||||||
|
`Computer tool could not find "${command}".`,
|
||||||
|
"Desktop sandboxes install agent-computer alongside the browser tool.",
|
||||||
|
"If you are running locally, either install the helper or omit the computer tool.",
|
||||||
|
"Recommended setup inside a sandbox image: copy agent-computer into /usr/local/bin and install xdotool, wmctrl, tesseract-ocr, and xclip.",
|
||||||
|
].join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseComputerPayload(output: string): {
|
||||||
|
text: string;
|
||||||
|
snapshotId?: string;
|
||||||
|
screenshotPath?: string;
|
||||||
|
} {
|
||||||
|
if (output.length === 0) {
|
||||||
|
return { text: "" };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const payload = JSON.parse(output) as {
|
||||||
|
snapshot?: { snapshotId?: string; screenshotPath?: string };
|
||||||
|
summary?: string;
|
||||||
|
screenshotPath?: string;
|
||||||
|
snapshotId?: string;
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
text: JSON.stringify(payload, null, 2),
|
||||||
|
snapshotId: payload.snapshot?.snapshotId ?? payload.snapshotId,
|
||||||
|
screenshotPath:
|
||||||
|
payload.snapshot?.screenshotPath ?? payload.screenshotPath,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return { text: output };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createComputerTool(
|
||||||
|
cwd: string,
|
||||||
|
options?: ComputerToolOptions,
|
||||||
|
): AgentTool<typeof computerSchema> {
|
||||||
|
const operations = options?.operations ?? defaultComputerOperations;
|
||||||
|
const command = options?.command ?? DEFAULT_COMPUTER_COMMAND;
|
||||||
|
const defaultTimeoutSeconds =
|
||||||
|
options?.defaultTimeoutSeconds ?? DEFAULT_COMPUTER_TIMEOUT_SECONDS;
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: "computer",
|
||||||
|
label: "computer",
|
||||||
|
description:
|
||||||
|
"Use the desktop computer when browser DOM control is not enough: observe the screen, interact with windows and apps, type, click, drag, scroll, wait for native UI changes, and read or write the clipboard.",
|
||||||
|
parameters: computerSchema,
|
||||||
|
execute: async (_toolCallId, input, signal, onUpdate) => {
|
||||||
|
const commandContext = buildComputerCommand(cwd, input, options);
|
||||||
|
const details: ComputerToolDetails = {
|
||||||
|
action: commandContext.action,
|
||||||
|
command,
|
||||||
|
args: commandContext.args,
|
||||||
|
stateDir: commandContext.stateDir,
|
||||||
|
};
|
||||||
|
|
||||||
|
onUpdate?.({
|
||||||
|
content: [{ type: "text", text: commandContext.statusMessage }],
|
||||||
|
details,
|
||||||
|
});
|
||||||
|
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { exitCode } = await operations.exec(
|
||||||
|
command,
|
||||||
|
commandContext.args,
|
||||||
|
{
|
||||||
|
cwd,
|
||||||
|
env: getShellEnv(),
|
||||||
|
onData: (data) => chunks.push(data),
|
||||||
|
signal,
|
||||||
|
timeout: defaultTimeoutSeconds,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const output = normalizeOutput(chunks);
|
||||||
|
if (exitCode !== 0) {
|
||||||
|
throw new Error(
|
||||||
|
buildComputerErrorMessage(commandContext.action, output, exitCode),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed = parseComputerPayload(output);
|
||||||
|
if (parsed.snapshotId) {
|
||||||
|
details.snapshotId = parsed.snapshotId;
|
||||||
|
}
|
||||||
|
if (parsed.screenshotPath) {
|
||||||
|
details.screenshotPath = parsed.screenshotPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
text:
|
||||||
|
parsed.text.length > 0
|
||||||
|
? parsed.text
|
||||||
|
: commandContext.successMessage,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
details,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
if (
|
||||||
|
error instanceof Error &&
|
||||||
|
"code" in error &&
|
||||||
|
error.code === "ENOENT"
|
||||||
|
) {
|
||||||
|
throw new Error(getMissingComputerCommandMessage(command));
|
||||||
|
}
|
||||||
|
if (error instanceof Error && error.message === "aborted") {
|
||||||
|
throw new Error(`Computer action "${commandContext.action}" aborted`);
|
||||||
|
}
|
||||||
|
if (error instanceof Error && error.message.startsWith("timeout:")) {
|
||||||
|
const seconds = error.message.split(":")[1];
|
||||||
|
throw new Error(
|
||||||
|
`Computer action "${commandContext.action}" timed out after ${seconds} seconds`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export const computerTool = createComputerTool(process.cwd());
|
||||||
|
|
@ -19,6 +19,16 @@ export {
|
||||||
browserTool,
|
browserTool,
|
||||||
createBrowserTool,
|
createBrowserTool,
|
||||||
} from "./browser.js";
|
} from "./browser.js";
|
||||||
|
export {
|
||||||
|
type ComputerObservationMode,
|
||||||
|
type ComputerOperations,
|
||||||
|
type ComputerToolAction,
|
||||||
|
type ComputerToolDetails,
|
||||||
|
type ComputerToolInput,
|
||||||
|
type ComputerToolOptions,
|
||||||
|
computerTool,
|
||||||
|
createComputerTool,
|
||||||
|
} from "./computer.js";
|
||||||
export {
|
export {
|
||||||
createEditTool,
|
createEditTool,
|
||||||
type EditOperations,
|
type EditOperations,
|
||||||
|
|
@ -84,6 +94,11 @@ import {
|
||||||
createBrowserTool,
|
createBrowserTool,
|
||||||
type BrowserToolOptions,
|
type BrowserToolOptions,
|
||||||
} from "./browser.js";
|
} from "./browser.js";
|
||||||
|
import {
|
||||||
|
computerTool,
|
||||||
|
createComputerTool,
|
||||||
|
type ComputerToolOptions,
|
||||||
|
} from "./computer.js";
|
||||||
import { createEditTool, editTool } from "./edit.js";
|
import { createEditTool, editTool } from "./edit.js";
|
||||||
import { createFindTool, findTool } from "./find.js";
|
import { createFindTool, findTool } from "./find.js";
|
||||||
import { createGrepTool, grepTool } from "./grep.js";
|
import { createGrepTool, grepTool } from "./grep.js";
|
||||||
|
|
@ -102,6 +117,7 @@ export const allTools = {
|
||||||
read: readTool,
|
read: readTool,
|
||||||
bash: bashTool,
|
bash: bashTool,
|
||||||
browser: browserTool,
|
browser: browserTool,
|
||||||
|
computer: computerTool,
|
||||||
edit: editTool,
|
edit: editTool,
|
||||||
write: writeTool,
|
write: writeTool,
|
||||||
grep: grepTool,
|
grep: grepTool,
|
||||||
|
|
@ -115,6 +131,7 @@ export const defaultCodingToolNames: ToolName[] = [
|
||||||
"read",
|
"read",
|
||||||
"bash",
|
"bash",
|
||||||
"browser",
|
"browser",
|
||||||
|
"computer",
|
||||||
"edit",
|
"edit",
|
||||||
"write",
|
"write",
|
||||||
];
|
];
|
||||||
|
|
@ -131,19 +148,16 @@ export interface ToolsOptions {
|
||||||
bash?: BashToolOptions;
|
bash?: BashToolOptions;
|
||||||
/** Options for the browser tool */
|
/** Options for the browser tool */
|
||||||
browser?: BrowserToolOptions;
|
browser?: BrowserToolOptions;
|
||||||
|
/** Options for the computer tool */
|
||||||
|
computer?: ComputerToolOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create coding tools configured for a specific working directory.
|
* Create coding tools configured for a specific working directory.
|
||||||
*/
|
*/
|
||||||
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
|
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
|
||||||
return [
|
const tools = createAllTools(cwd, options);
|
||||||
createReadTool(cwd, options?.read),
|
return defaultCodingToolNames.map((toolName) => tools[toolName]);
|
||||||
createBashTool(cwd, options?.bash),
|
|
||||||
createBrowserTool(cwd, options?.browser),
|
|
||||||
createEditTool(cwd),
|
|
||||||
createWriteTool(cwd),
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -172,6 +186,7 @@ export function createAllTools(
|
||||||
read: createReadTool(cwd, options?.read),
|
read: createReadTool(cwd, options?.read),
|
||||||
bash: createBashTool(cwd, options?.bash),
|
bash: createBashTool(cwd, options?.bash),
|
||||||
browser: createBrowserTool(cwd, options?.browser),
|
browser: createBrowserTool(cwd, options?.browser),
|
||||||
|
computer: createComputerTool(cwd, options?.computer),
|
||||||
edit: createEditTool(cwd),
|
edit: createEditTool(cwd),
|
||||||
write: createWriteTool(cwd),
|
write: createWriteTool(cwd),
|
||||||
grep: createGrepTool(cwd),
|
grep: createGrepTool(cwd),
|
||||||
|
|
|
||||||
|
|
@ -182,6 +182,7 @@ export {
|
||||||
createAgentSession,
|
createAgentSession,
|
||||||
createBashTool,
|
createBashTool,
|
||||||
createBrowserTool,
|
createBrowserTool,
|
||||||
|
createComputerTool,
|
||||||
// Tool factories (for custom cwd)
|
// Tool factories (for custom cwd)
|
||||||
createCodingTools,
|
createCodingTools,
|
||||||
createEditTool,
|
createEditTool,
|
||||||
|
|
@ -253,6 +254,13 @@ export {
|
||||||
type BrowserToolInput,
|
type BrowserToolInput,
|
||||||
type BrowserToolOptions,
|
type BrowserToolOptions,
|
||||||
browserTool,
|
browserTool,
|
||||||
|
type ComputerObservationMode,
|
||||||
|
type ComputerOperations,
|
||||||
|
type ComputerToolAction,
|
||||||
|
type ComputerToolDetails,
|
||||||
|
type ComputerToolInput,
|
||||||
|
type ComputerToolOptions,
|
||||||
|
computerTool,
|
||||||
codingTools,
|
codingTools,
|
||||||
defaultCodingToolNames,
|
defaultCodingToolNames,
|
||||||
DEFAULT_MAX_BYTES,
|
DEFAULT_MAX_BYTES,
|
||||||
|
|
|
||||||
175
packages/coding-agent/test/computer-tool.test.ts
Normal file
175
packages/coding-agent/test/computer-tool.test.ts
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
import { mkdtempSync, rmSync } from "node:fs";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { afterEach, describe, expect, it } from "vitest";
|
||||||
|
import { parseArgs } from "../src/cli/args.js";
|
||||||
|
import { buildSystemPrompt } from "../src/core/system-prompt.js";
|
||||||
|
import {
|
||||||
|
type ComputerOperations,
|
||||||
|
type ComputerToolDetails,
|
||||||
|
createAllTools,
|
||||||
|
createComputerTool,
|
||||||
|
defaultCodingToolNames,
|
||||||
|
} from "../src/core/tools/index.js";
|
||||||
|
|
||||||
|
interface TextBlock {
|
||||||
|
type: "text";
|
||||||
|
text: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ToolContentBlock = TextBlock | { type: string };
|
||||||
|
|
||||||
|
interface ToolResultLike {
|
||||||
|
content: ToolContentBlock[];
|
||||||
|
details?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ComputerExecCall {
|
||||||
|
command: string;
|
||||||
|
args: string[];
|
||||||
|
cwd: string;
|
||||||
|
env: NodeJS.ProcessEnv;
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTextOutput(result: ToolResultLike): string {
|
||||||
|
return result.content
|
||||||
|
.filter((block): block is TextBlock => block.type === "text")
|
||||||
|
.map((block) => block.text)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function createMockComputerOperations(
|
||||||
|
output = "",
|
||||||
|
exitCode: number | null = 0,
|
||||||
|
): {
|
||||||
|
calls: ComputerExecCall[];
|
||||||
|
operations: ComputerOperations;
|
||||||
|
} {
|
||||||
|
const calls: ComputerExecCall[] = [];
|
||||||
|
|
||||||
|
return {
|
||||||
|
calls,
|
||||||
|
operations: {
|
||||||
|
exec: async (command, args, options) => {
|
||||||
|
calls.push({
|
||||||
|
command,
|
||||||
|
args,
|
||||||
|
cwd: options.cwd,
|
||||||
|
env: options.env,
|
||||||
|
timeout: options.timeout,
|
||||||
|
});
|
||||||
|
if (output.length > 0) {
|
||||||
|
options.onData(Buffer.from(output, "utf-8"));
|
||||||
|
}
|
||||||
|
return { exitCode };
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("computer tool", () => {
|
||||||
|
const tempDirs: string[] = [];
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
while (tempDirs.length > 0) {
|
||||||
|
const tempDir = tempDirs.pop();
|
||||||
|
if (tempDir) {
|
||||||
|
rmSync(tempDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function createTempDir(prefix: string): string {
|
||||||
|
const tempDir = mkdtempSync(join(tmpdir(), prefix));
|
||||||
|
tempDirs.push(tempDir);
|
||||||
|
return tempDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
it("observes the desktop through the agent-computer helper", async () => {
|
||||||
|
const cwd = createTempDir("coding-agent-computer-observe-");
|
||||||
|
const stateDir = join(cwd, "computer-state");
|
||||||
|
const { calls, operations } = createMockComputerOperations(
|
||||||
|
JSON.stringify({
|
||||||
|
ok: true,
|
||||||
|
action: "observe",
|
||||||
|
summary: "Captured desktop snapshot snap-1",
|
||||||
|
snapshot: {
|
||||||
|
snapshotId: "snap-1",
|
||||||
|
screenshotPath: "/tmp/snap-1.png",
|
||||||
|
backend: "hybrid",
|
||||||
|
activeWindow: null,
|
||||||
|
windows: [],
|
||||||
|
refs: [],
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const computerTool = createComputerTool(cwd, {
|
||||||
|
operations,
|
||||||
|
command: "agent-computer-test",
|
||||||
|
stateDir,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = (await computerTool.execute("computer-observe", {
|
||||||
|
action: "observe",
|
||||||
|
})) as ToolResultLike;
|
||||||
|
|
||||||
|
expect(calls).toHaveLength(1);
|
||||||
|
expect(calls[0]).toMatchObject({
|
||||||
|
command: "agent-computer-test",
|
||||||
|
args: ["--state-dir", stateDir, "--input", '{"action":"observe"}'],
|
||||||
|
cwd,
|
||||||
|
timeout: 90,
|
||||||
|
});
|
||||||
|
|
||||||
|
const details = result.details as ComputerToolDetails | undefined;
|
||||||
|
expect(details?.stateDir).toBe(stateDir);
|
||||||
|
expect(details?.snapshotId).toBe("snap-1");
|
||||||
|
expect(details?.screenshotPath).toBe("/tmp/snap-1.png");
|
||||||
|
expect(getTextOutput(result)).toContain('"snapshotId": "snap-1"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("validates click targets before spawning the helper", async () => {
|
||||||
|
const cwd = createTempDir("coding-agent-computer-click-");
|
||||||
|
const stateDir = join(cwd, "computer-state");
|
||||||
|
const { calls, operations } = createMockComputerOperations();
|
||||||
|
|
||||||
|
const computerTool = createComputerTool(cwd, {
|
||||||
|
operations,
|
||||||
|
stateDir,
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
computerTool.execute("computer-click-missing-target", {
|
||||||
|
action: "click",
|
||||||
|
}),
|
||||||
|
).rejects.toThrow(
|
||||||
|
"computer click requires snapshotId and ref, or explicit x and y coordinates",
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(calls).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("accepts computer in --tools and exposes it in built-in tool wiring", () => {
|
||||||
|
const parsed = parseArgs(["--tools", "computer,read"]);
|
||||||
|
expect(parsed.tools).toEqual(["computer", "read"]);
|
||||||
|
|
||||||
|
expect(defaultCodingToolNames).toContain("computer");
|
||||||
|
expect(createAllTools(process.cwd()).computer.name).toBe("computer");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("mentions computer in the default system prompt", () => {
|
||||||
|
const prompt = buildSystemPrompt();
|
||||||
|
|
||||||
|
expect(prompt).toContain(
|
||||||
|
"- computer: Use the desktop computer: observe the screen",
|
||||||
|
);
|
||||||
|
expect(prompt).toContain(
|
||||||
|
"Computer: observe before interacting. Use it for native UI",
|
||||||
|
);
|
||||||
|
expect(prompt).toContain(
|
||||||
|
"Prefer browser for websites and DOM-aware tasks. Switch to computer",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -4,11 +4,10 @@ import { TmuxAdapter } from "./tmux-adapter";
|
||||||
|
|
||||||
describe("TmuxAdapter", () => {
|
describe("TmuxAdapter", () => {
|
||||||
let adapter: TmuxAdapter;
|
let adapter: TmuxAdapter;
|
||||||
let mockExecCommand: ReturnType<typeof vi.spyOn>;
|
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
adapter = new TmuxAdapter();
|
adapter = new TmuxAdapter();
|
||||||
mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
|
vi.spyOn(terminalAdapter, "execCommand");
|
||||||
delete process.env.TMUX;
|
delete process.env.TMUX;
|
||||||
delete process.env.ZELLIJ;
|
delete process.env.ZELLIJ;
|
||||||
delete process.env.WEZTERM_PANE;
|
delete process.env.WEZTERM_PANE;
|
||||||
|
|
@ -21,6 +20,7 @@ describe("TmuxAdapter", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it("detects tmux in headless runtimes when the binary is available", () => {
|
it("detects tmux in headless runtimes when the binary is available", () => {
|
||||||
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
mockExecCommand.mockReturnValue({
|
mockExecCommand.mockReturnValue({
|
||||||
stdout: "tmux 3.4",
|
stdout: "tmux 3.4",
|
||||||
stderr: "",
|
stderr: "",
|
||||||
|
|
@ -33,6 +33,7 @@ describe("TmuxAdapter", () => {
|
||||||
|
|
||||||
it("does not detect tmux in GUI terminals just because the binary exists", () => {
|
it("does not detect tmux in GUI terminals just because the binary exists", () => {
|
||||||
process.env.COLORTERM = "truecolor";
|
process.env.COLORTERM = "truecolor";
|
||||||
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
mockExecCommand.mockReturnValue({
|
mockExecCommand.mockReturnValue({
|
||||||
stdout: "tmux 3.4",
|
stdout: "tmux 3.4",
|
||||||
stderr: "",
|
stderr: "",
|
||||||
|
|
@ -44,7 +45,8 @@ describe("TmuxAdapter", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it("creates a detached team session when not already inside tmux", () => {
|
it("creates a detached team session when not already inside tmux", () => {
|
||||||
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
|
mockExecCommand.mockImplementation((_bin, args) => {
|
||||||
if (args[0] === "has-session") {
|
if (args[0] === "has-session") {
|
||||||
return { stdout: "", stderr: "missing", status: 1 };
|
return { stdout: "", stderr: "missing", status: 1 };
|
||||||
}
|
}
|
||||||
|
|
@ -65,12 +67,18 @@ describe("TmuxAdapter", () => {
|
||||||
|
|
||||||
expect(mockExecCommand).toHaveBeenCalledWith(
|
expect(mockExecCommand).toHaveBeenCalledWith(
|
||||||
"tmux",
|
"tmux",
|
||||||
expect.arrayContaining(["new-session", "-d", "-s", "companion-teams-demo"]),
|
expect.arrayContaining([
|
||||||
|
"new-session",
|
||||||
|
"-d",
|
||||||
|
"-s",
|
||||||
|
"companion-teams-demo",
|
||||||
|
]),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("splits an existing detached session when not already inside tmux", () => {
|
it("splits an existing detached session when not already inside tmux", () => {
|
||||||
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
|
mockExecCommand.mockImplementation((_bin, args) => {
|
||||||
if (args[0] === "has-session") {
|
if (args[0] === "has-session") {
|
||||||
return { stdout: "", stderr: "", status: 0 };
|
return { stdout: "", stderr: "", status: 0 };
|
||||||
}
|
}
|
||||||
|
|
@ -96,6 +104,7 @@ describe("TmuxAdapter", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it("checks pane liveness by pane id", () => {
|
it("checks pane liveness by pane id", () => {
|
||||||
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
mockExecCommand.mockReturnValue({
|
mockExecCommand.mockReturnValue({
|
||||||
stdout: "%1\n%7\n",
|
stdout: "%1\n%7\n",
|
||||||
stderr: "",
|
stderr: "",
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,10 @@ import { WezTermAdapter } from "./wezterm-adapter";
|
||||||
|
|
||||||
describe("WezTermAdapter", () => {
|
describe("WezTermAdapter", () => {
|
||||||
let adapter: WezTermAdapter;
|
let adapter: WezTermAdapter;
|
||||||
let mockExecCommand: ReturnType<typeof vi.spyOn>;
|
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
adapter = new WezTermAdapter();
|
adapter = new WezTermAdapter();
|
||||||
mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
|
vi.spyOn(terminalAdapter, "execCommand");
|
||||||
delete process.env.WEZTERM_PANE;
|
delete process.env.WEZTERM_PANE;
|
||||||
delete process.env.TMUX;
|
delete process.env.TMUX;
|
||||||
delete process.env.ZELLIJ;
|
delete process.env.ZELLIJ;
|
||||||
|
|
@ -31,6 +30,7 @@ describe("WezTermAdapter", () => {
|
||||||
|
|
||||||
describe("detect", () => {
|
describe("detect", () => {
|
||||||
it("should detect when WEZTERM_PANE is set", () => {
|
it("should detect when WEZTERM_PANE is set", () => {
|
||||||
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
mockExecCommand.mockReturnValue({
|
mockExecCommand.mockReturnValue({
|
||||||
stdout: "version 1.0",
|
stdout: "version 1.0",
|
||||||
stderr: "",
|
stderr: "",
|
||||||
|
|
@ -43,7 +43,8 @@ describe("WezTermAdapter", () => {
|
||||||
describe("spawn", () => {
|
describe("spawn", () => {
|
||||||
it("should spawn first pane to the right with 50%", () => {
|
it("should spawn first pane to the right with 50%", () => {
|
||||||
// Mock getPanes finding only current pane
|
// Mock getPanes finding only current pane
|
||||||
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
|
mockExecCommand.mockImplementation((_bin, args) => {
|
||||||
if (args.includes("list")) {
|
if (args.includes("list")) {
|
||||||
return {
|
return {
|
||||||
stdout: JSON.stringify([{ pane_id: 0, tab_id: 0 }]),
|
stdout: JSON.stringify([{ pane_id: 0, tab_id: 0 }]),
|
||||||
|
|
@ -79,7 +80,8 @@ describe("WezTermAdapter", () => {
|
||||||
|
|
||||||
it("should spawn subsequent panes by splitting the sidebar", () => {
|
it("should spawn subsequent panes by splitting the sidebar", () => {
|
||||||
// Mock getPanes finding current pane (0) and sidebar pane (1)
|
// Mock getPanes finding current pane (0) and sidebar pane (1)
|
||||||
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
|
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
|
||||||
|
mockExecCommand.mockImplementation((_bin, args) => {
|
||||||
if (args.includes("list")) {
|
if (args.includes("list")) {
|
||||||
return {
|
return {
|
||||||
stdout: JSON.stringify([
|
stdout: JSON.stringify([
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue