Merge pull request #305 from getcompanion-ai/computer

computer use tool
This commit is contained in:
Hari 2026-03-11 15:01:34 -04:00 committed by GitHub
commit a5d70ce55e
9 changed files with 1086 additions and 17 deletions

View file

@ -325,6 +325,7 @@ ${chalk.bold("Environment Variables:")}
AWS_REGION - AWS region for Amazon Bedrock (e.g., us-east-1)
${ENV_AGENT_DIR.padEnd(32)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent)
COMPANION_PACKAGE_DIR - Override package directory (for Nix/Guix store paths)
COMPANION_AGENT_COMPUTER_COMMAND - Override the computer helper command (default: agent-computer)
COMPANION_OFFLINE - Disable startup network operations when set to 1/true/yes
COMPANION_SHARE_VIEWER_URL - Base URL for /share command (default: https://companion.dev/session/)
COMPANION_AI_ANTIGRAVITY_VERSION - Override Antigravity User-Agent version (e.g., 1.23.0)
@ -333,6 +334,7 @@ ${chalk.bold(`Available Tools (default: ${defaultToolsText}):`)}
read - Read file contents
bash - Execute bash commands
browser - Browser automation with persistent state
computer - Desktop computer automation with screen observation and native UI control
edit - Edit files with find/replace
write - Write files (creates/overwrites)
grep - Search file contents (read-only, off by default)

View file

@ -26,10 +26,12 @@ import {
allTools,
bashTool,
browserTool,
computerTool,
codingTools,
defaultCodingToolNames,
createBashTool,
createBrowserTool,
createComputerTool,
createCodingTools,
createEditTool,
createFindTool,
@ -67,7 +69,7 @@ export interface CreateAgentSessionOptions {
/** Models available for cycling (Ctrl+P in interactive mode) */
scopedModels?: Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }>;
/** Built-in tools to use. Default: codingTools [read, bash, browser, edit, write] */
/** Built-in tools to use. Default: codingTools [read, bash, browser, computer, edit, write] */
tools?: Tool[];
/** Custom tools to register (in addition to built-in tools). */
customTools?: ToolDefinition[];
@ -113,6 +115,7 @@ export {
readTool,
bashTool,
browserTool,
computerTool,
editTool,
writeTool,
grepTool,
@ -127,6 +130,7 @@ export {
createReadTool,
createBashTool,
createBrowserTool,
createComputerTool,
createEditTool,
createWriteTool,
createGrepTool,

View file

@ -11,6 +11,8 @@ const toolDescriptions: Record<string, string> = {
bash: "Run shell commands",
browser:
"Browse the web: open, snapshot, click, fill, wait, screenshot, save/load state",
computer:
"Use the desktop computer: observe the screen, click, type, send hotkeys, manage apps/windows, wait for native UI, and read/write the clipboard",
edit: "Surgical file edits (find exact text, replace it)",
write: "Create new files or completely rewrite existing ones",
grep: "Search file contents by regex (respects .gitignore)",
@ -167,6 +169,7 @@ export function buildSystemPrompt(
const hasBash = tools.includes("bash");
const hasBrowser = tools.includes("browser");
const hasComputer = tools.includes("computer");
const hasEdit = tools.includes("edit");
const hasWrite = tools.includes("write");
const hasGrep = tools.includes("grep");
@ -215,6 +218,16 @@ export function buildSystemPrompt(
"Browser: snapshot before interacting with elements. Use it for research and learning too, not just automation",
);
}
if (hasComputer) {
addGuideline(
"Computer: observe before interacting. Use it for native UI, desktop apps, file pickers, downloads, and OS dialogs",
);
}
if (hasBrowser && hasComputer) {
addGuideline(
"Prefer browser for websites and DOM-aware tasks. Switch to computer when native UI or desktop state matters",
);
}
// Output hygiene
if (hasEdit || hasWrite) {

View file

@ -0,0 +1,677 @@
import { spawn } from "node:child_process";
import { mkdirSync } from "node:fs";
import { join, resolve } from "node:path";
import type { AgentTool } from "@mariozechner/companion-agent-core";
import { type Static, Type } from "@sinclair/typebox";
import { getAgentDir } from "../../config.js";
import {
getShellEnv,
killProcessTree,
sanitizeBinaryOutput,
} from "../../utils/shell.js";
const computerActions = [
"observe",
"click",
"type",
"hotkey",
"scroll",
"drag",
"wait",
"app_list",
"app_open",
"app_focus",
"window_list",
"window_focus",
"window_move",
"window_resize",
"window_close",
"clipboard_read",
"clipboard_write",
] as const;
const computerObservationModes = ["hybrid", "ocr"] as const;
const computerSnapshotIdPattern = /^[A-Za-z0-9_-]+$/;
const DEFAULT_COMPUTER_COMMAND =
process.env.COMPANION_AGENT_COMPUTER_COMMAND || "agent-computer";
const DEFAULT_COMPUTER_TIMEOUT_SECONDS = 90;
const computerSchema = Type.Object({
action: Type.Union(
computerActions.map((action) => Type.Literal(action)),
{ description: "Computer action to execute" },
),
snapshotId: Type.Optional(
Type.String({ description: "Snapshot ID returned from observe" }),
),
ref: Type.Optional(
Type.String({
description:
"Target ref from observe output, such as w1 for a window or t3 for OCR text",
}),
),
x: Type.Optional(Type.Number({ description: "Target x coordinate" })),
y: Type.Optional(Type.Number({ description: "Target y coordinate" })),
toRef: Type.Optional(
Type.String({ description: "Destination ref for drag actions" }),
),
toX: Type.Optional(
Type.Number({ description: "Destination x coordinate for drag actions" }),
),
toY: Type.Optional(
Type.Number({ description: "Destination y coordinate for drag actions" }),
),
text: Type.Optional(
Type.String({
description:
"Text to type, text to wait for, or clipboard contents depending on action",
}),
),
keys: Type.Optional(
Type.Array(Type.String(), {
description: "Hotkey chord or key sequence, for example ['ctrl', 'l']",
minItems: 1,
}),
),
app: Type.Optional(
Type.String({
description:
"Installed app or running app name/class for app_open, app_focus, and wait",
}),
),
windowId: Type.Optional(
Type.String({ description: "Window ID, such as 0x04200007" }),
),
windowTitle: Type.Optional(
Type.String({ description: "Window title substring to match" }),
),
mode: Type.Optional(
Type.Union(
computerObservationModes.map((mode) => Type.Literal(mode)),
{ description: "Observation mode. Defaults to hybrid." },
),
),
amount: Type.Optional(
Type.Number({
description:
"Scroll amount in wheel steps. Positive scrolls down/right, negative scrolls up/left.",
}),
),
width: Type.Optional(
Type.Number({ description: "Target window width for resize actions" }),
),
height: Type.Optional(
Type.Number({ description: "Target window height for resize actions" }),
),
clear: Type.Optional(
Type.Boolean({
description: "Clear the active input field before typing",
}),
),
button: Type.Optional(
Type.Number({
description: "Mouse button for click or drag. Defaults to 1.",
minimum: 1,
maximum: 7,
}),
),
timeoutMs: Type.Optional(
Type.Number({
description: "Wait timeout in milliseconds for observe-derived waits",
minimum: 0,
}),
),
intervalMs: Type.Optional(
Type.Number({
description: "Polling interval for wait actions in milliseconds",
minimum: 10,
}),
),
});
export type ComputerToolAction = (typeof computerActions)[number];
export type ComputerObservationMode = (typeof computerObservationModes)[number];
export type ComputerToolInput = Static<typeof computerSchema>;
export interface ComputerToolDetails {
action: ComputerToolAction;
command: string;
args: string[];
stateDir: string;
snapshotId?: string;
screenshotPath?: string;
}
export interface ComputerOperations {
exec: (
command: string,
args: string[],
options: {
cwd: string;
env: NodeJS.ProcessEnv;
onData: (data: Buffer) => void;
signal?: AbortSignal;
timeout?: number;
},
) => Promise<{ exitCode: number | null }>;
}
const defaultComputerOperations: ComputerOperations = {
exec: (command, args, { cwd, env, onData, signal, timeout }) => {
return new Promise((resolvePromise, rejectPromise) => {
const child = spawn(command, args, {
cwd,
detached: true,
env,
stdio: ["ignore", "pipe", "pipe"],
});
let timedOut = false;
let timeoutHandle: NodeJS.Timeout | undefined;
if (timeout !== undefined && timeout > 0) {
timeoutHandle = setTimeout(() => {
timedOut = true;
if (child.pid) {
killProcessTree(child.pid);
}
}, timeout * 1000);
}
if (child.stdout) {
child.stdout.on("data", onData);
}
if (child.stderr) {
child.stderr.on("data", onData);
}
const onAbort = () => {
if (child.pid) {
killProcessTree(child.pid);
}
};
if (signal) {
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener("abort", onAbort, { once: true });
}
}
child.on("error", (error) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
rejectPromise(error);
});
child.on("close", (code) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
if (signal?.aborted) {
rejectPromise(new Error("aborted"));
return;
}
if (timedOut) {
rejectPromise(new Error(`timeout:${timeout}`));
return;
}
resolvePromise({ exitCode: code });
});
});
},
};
export interface ComputerToolOptions {
operations?: ComputerOperations;
command?: string;
defaultTimeoutSeconds?: number;
stateDir?: string;
agentDir?: string;
}
interface ComputerCommandContext {
action: ComputerToolAction;
args: string[];
statusMessage: string;
successMessage: string;
stateDir: string;
}
function resolveCommandPath(cwd: string, inputPath: string): string {
return resolve(cwd, inputPath);
}
function getComputerRootDir(options?: ComputerToolOptions): string {
const baseAgentDir = options?.agentDir ?? getAgentDir();
return join(baseAgentDir, "computer");
}
function getComputerStateDir(
cwd: string,
options?: ComputerToolOptions,
): string {
const stateDir = options?.stateDir ?? getComputerRootDir(options);
return resolveCommandPath(cwd, stateDir);
}
function ensureComputerDir(stateDir: string): void {
mkdirSync(stateDir, { recursive: true });
}
function normalizeOutput(chunks: Buffer[]): string {
return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
}
function hasCoordinateTarget(input: ComputerToolInput): boolean {
return input.x !== undefined && input.y !== undefined;
}
function hasRefTarget(input: ComputerToolInput): boolean {
return input.snapshotId !== undefined && input.ref !== undefined;
}
function hasWindowTarget(input: ComputerToolInput): boolean {
return input.windowId !== undefined || input.windowTitle !== undefined;
}
function hasDragDestination(input: ComputerToolInput): boolean {
return (
input.toRef !== undefined ||
(input.toX !== undefined && input.toY !== undefined)
);
}
function validateSnapshotId(snapshotId: string): void {
if (!computerSnapshotIdPattern.test(snapshotId)) {
throw new Error(`Invalid computer snapshotId: "${snapshotId}"`);
}
}
function validateWaitInput(input: ComputerToolInput): void {
const targetCount =
(input.ref !== undefined ? 1 : 0) +
(input.text !== undefined ? 1 : 0) +
(input.app !== undefined ? 1 : 0) +
(input.windowId !== undefined ? 1 : 0) +
(input.windowTitle !== undefined ? 1 : 0);
if (targetCount === 0 && input.timeoutMs === undefined) {
throw new Error(
"computer wait requires one of ref, text, app, windowId, windowTitle, or timeoutMs",
);
}
if (targetCount > 1) {
throw new Error(
"computer wait requires exactly one of ref, text, app, windowId, or windowTitle",
);
}
}
function validateComputerInput(input: ComputerToolInput): void {
if (input.snapshotId !== undefined) {
validateSnapshotId(input.snapshotId);
}
switch (input.action) {
case "observe":
case "app_list":
case "window_list":
case "clipboard_read":
return;
case "click":
if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
throw new Error(
"computer click requires snapshotId and ref, or explicit x and y coordinates",
);
}
return;
case "type":
if (input.text === undefined) {
throw new Error("computer type requires text");
}
if (input.ref !== undefined && input.snapshotId === undefined) {
throw new Error("computer type with ref requires snapshotId");
}
return;
case "hotkey":
if (!input.keys || input.keys.length === 0) {
throw new Error("computer hotkey requires keys");
}
return;
case "scroll":
if (input.amount === undefined || input.amount === 0) {
throw new Error("computer scroll requires a non-zero amount");
}
if (input.ref !== undefined && input.snapshotId === undefined) {
throw new Error("computer scroll with ref requires snapshotId");
}
return;
case "drag":
if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
throw new Error(
"computer drag requires a starting target via snapshotId and ref, or x and y coordinates",
);
}
if (!hasDragDestination(input)) {
throw new Error(
"computer drag requires a destination via toRef, or explicit toX and toY coordinates",
);
}
if (input.toRef !== undefined && input.snapshotId === undefined) {
throw new Error("computer drag with toRef requires snapshotId");
}
return;
case "wait":
validateWaitInput(input);
if (input.ref !== undefined && input.snapshotId === undefined) {
throw new Error("computer wait with ref requires snapshotId");
}
return;
case "app_open":
case "app_focus":
if (!input.app) {
throw new Error(`computer ${input.action} requires app`);
}
return;
case "window_focus":
case "window_close":
if (!hasWindowTarget(input)) {
throw new Error(
`computer ${input.action} requires windowId or windowTitle`,
);
}
return;
case "window_move":
if (!hasWindowTarget(input)) {
throw new Error(
"computer window_move requires windowId or windowTitle",
);
}
if (input.x === undefined || input.y === undefined) {
throw new Error("computer window_move requires x and y");
}
return;
case "window_resize":
if (!hasWindowTarget(input)) {
throw new Error(
"computer window_resize requires windowId or windowTitle",
);
}
if (input.width === undefined || input.height === undefined) {
throw new Error("computer window_resize requires width and height");
}
return;
case "clipboard_write":
if (input.text === undefined) {
throw new Error("computer clipboard_write requires text");
}
return;
default: {
const unsupportedAction: never = input.action;
throw new Error(`Unsupported computer action: ${unsupportedAction}`);
}
}
}
function describeAction(input: ComputerToolInput): {
statusMessage: string;
successMessage: string;
} {
switch (input.action) {
case "observe":
return {
statusMessage: "Observing desktop...",
successMessage: "Captured desktop snapshot",
};
case "click":
return {
statusMessage: "Clicking desktop target...",
successMessage: "Clicked desktop target",
};
case "type":
return {
statusMessage: "Typing into desktop...",
successMessage: "Typed into desktop",
};
case "hotkey":
return {
statusMessage: "Sending hotkey...",
successMessage: "Sent hotkey",
};
case "scroll":
return {
statusMessage: "Scrolling desktop...",
successMessage: "Scrolled desktop",
};
case "drag":
return {
statusMessage: "Dragging desktop target...",
successMessage: "Dragged desktop target",
};
case "wait":
return {
statusMessage: "Waiting for desktop state...",
successMessage: "Desktop wait condition satisfied",
};
case "app_list":
return {
statusMessage: "Listing apps...",
successMessage: "Listed apps",
};
case "app_open":
return {
statusMessage: `Opening app ${input.app}...`,
successMessage: `Opened app ${input.app}`,
};
case "app_focus":
return {
statusMessage: `Focusing app ${input.app}...`,
successMessage: `Focused app ${input.app}`,
};
case "window_list":
return {
statusMessage: "Listing windows...",
successMessage: "Listed windows",
};
case "window_focus":
return {
statusMessage: "Focusing window...",
successMessage: "Focused window",
};
case "window_move":
return {
statusMessage: "Moving window...",
successMessage: "Moved window",
};
case "window_resize":
return {
statusMessage: "Resizing window...",
successMessage: "Resized window",
};
case "window_close":
return {
statusMessage: "Closing window...",
successMessage: "Closed window",
};
case "clipboard_read":
return {
statusMessage: "Reading clipboard...",
successMessage: "Read clipboard",
};
case "clipboard_write":
return {
statusMessage: "Writing clipboard...",
successMessage: "Wrote clipboard",
};
}
}
function buildComputerCommand(
cwd: string,
input: ComputerToolInput,
options?: ComputerToolOptions,
): ComputerCommandContext {
validateComputerInput(input);
const stateDir = getComputerStateDir(cwd, options);
ensureComputerDir(stateDir);
const actionDescription = describeAction(input);
return {
action: input.action,
args: ["--state-dir", stateDir, "--input", JSON.stringify(input)],
statusMessage: actionDescription.statusMessage,
successMessage: actionDescription.successMessage,
stateDir,
};
}
function buildComputerErrorMessage(
action: ComputerToolAction,
output: string,
exitCode: number | null,
): string {
const base =
exitCode === null
? `Computer action "${action}" failed`
: `Computer action "${action}" exited with code ${exitCode}`;
return output.length > 0 ? `${output}\n\n${base}` : base;
}
function getMissingComputerCommandMessage(command: string): string {
return [
`Computer tool could not find "${command}".`,
"Desktop sandboxes install agent-computer alongside the browser tool.",
"If you are running locally, either install the helper or omit the computer tool.",
"Recommended setup inside a sandbox image: copy agent-computer into /usr/local/bin and install xdotool, wmctrl, tesseract-ocr, and xclip.",
].join("\n");
}
function parseComputerPayload(output: string): {
text: string;
snapshotId?: string;
screenshotPath?: string;
} {
if (output.length === 0) {
return { text: "" };
}
try {
const payload = JSON.parse(output) as {
snapshot?: { snapshotId?: string; screenshotPath?: string };
summary?: string;
screenshotPath?: string;
snapshotId?: string;
};
return {
text: JSON.stringify(payload, null, 2),
snapshotId: payload.snapshot?.snapshotId ?? payload.snapshotId,
screenshotPath:
payload.snapshot?.screenshotPath ?? payload.screenshotPath,
};
} catch {
return { text: output };
}
}
export function createComputerTool(
cwd: string,
options?: ComputerToolOptions,
): AgentTool<typeof computerSchema> {
const operations = options?.operations ?? defaultComputerOperations;
const command = options?.command ?? DEFAULT_COMPUTER_COMMAND;
const defaultTimeoutSeconds =
options?.defaultTimeoutSeconds ?? DEFAULT_COMPUTER_TIMEOUT_SECONDS;
return {
name: "computer",
label: "computer",
description:
"Use the desktop computer when browser DOM control is not enough: observe the screen, interact with windows and apps, type, click, drag, scroll, wait for native UI changes, and read or write the clipboard.",
parameters: computerSchema,
execute: async (_toolCallId, input, signal, onUpdate) => {
const commandContext = buildComputerCommand(cwd, input, options);
const details: ComputerToolDetails = {
action: commandContext.action,
command,
args: commandContext.args,
stateDir: commandContext.stateDir,
};
onUpdate?.({
content: [{ type: "text", text: commandContext.statusMessage }],
details,
});
const chunks: Buffer[] = [];
try {
const { exitCode } = await operations.exec(
command,
commandContext.args,
{
cwd,
env: getShellEnv(),
onData: (data) => chunks.push(data),
signal,
timeout: defaultTimeoutSeconds,
},
);
const output = normalizeOutput(chunks);
if (exitCode !== 0) {
throw new Error(
buildComputerErrorMessage(commandContext.action, output, exitCode),
);
}
const parsed = parseComputerPayload(output);
if (parsed.snapshotId) {
details.snapshotId = parsed.snapshotId;
}
if (parsed.screenshotPath) {
details.screenshotPath = parsed.screenshotPath;
}
return {
content: [
{
type: "text",
text:
parsed.text.length > 0
? parsed.text
: commandContext.successMessage,
},
],
details,
};
} catch (error) {
if (
error instanceof Error &&
"code" in error &&
error.code === "ENOENT"
) {
throw new Error(getMissingComputerCommandMessage(command));
}
if (error instanceof Error && error.message === "aborted") {
throw new Error(`Computer action "${commandContext.action}" aborted`);
}
if (error instanceof Error && error.message.startsWith("timeout:")) {
const seconds = error.message.split(":")[1];
throw new Error(
`Computer action "${commandContext.action}" timed out after ${seconds} seconds`,
);
}
throw error;
}
},
};
}
export const computerTool = createComputerTool(process.cwd());

View file

@ -19,6 +19,16 @@ export {
browserTool,
createBrowserTool,
} from "./browser.js";
export {
type ComputerObservationMode,
type ComputerOperations,
type ComputerToolAction,
type ComputerToolDetails,
type ComputerToolInput,
type ComputerToolOptions,
computerTool,
createComputerTool,
} from "./computer.js";
export {
createEditTool,
type EditOperations,
@ -84,6 +94,11 @@ import {
createBrowserTool,
type BrowserToolOptions,
} from "./browser.js";
import {
computerTool,
createComputerTool,
type ComputerToolOptions,
} from "./computer.js";
import { createEditTool, editTool } from "./edit.js";
import { createFindTool, findTool } from "./find.js";
import { createGrepTool, grepTool } from "./grep.js";
@ -102,6 +117,7 @@ export const allTools = {
read: readTool,
bash: bashTool,
browser: browserTool,
computer: computerTool,
edit: editTool,
write: writeTool,
grep: grepTool,
@ -115,6 +131,7 @@ export const defaultCodingToolNames: ToolName[] = [
"read",
"bash",
"browser",
"computer",
"edit",
"write",
];
@ -131,19 +148,16 @@ export interface ToolsOptions {
bash?: BashToolOptions;
/** Options for the browser tool */
browser?: BrowserToolOptions;
/** Options for the computer tool */
computer?: ComputerToolOptions;
}
/**
* Create coding tools configured for a specific working directory.
*/
export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
return [
createReadTool(cwd, options?.read),
createBashTool(cwd, options?.bash),
createBrowserTool(cwd, options?.browser),
createEditTool(cwd),
createWriteTool(cwd),
];
const tools = createAllTools(cwd, options);
return defaultCodingToolNames.map((toolName) => tools[toolName]);
}
/**
@ -172,6 +186,7 @@ export function createAllTools(
read: createReadTool(cwd, options?.read),
bash: createBashTool(cwd, options?.bash),
browser: createBrowserTool(cwd, options?.browser),
computer: createComputerTool(cwd, options?.computer),
edit: createEditTool(cwd),
write: createWriteTool(cwd),
grep: createGrepTool(cwd),

View file

@ -182,6 +182,7 @@ export {
createAgentSession,
createBashTool,
createBrowserTool,
createComputerTool,
// Tool factories (for custom cwd)
createCodingTools,
createEditTool,
@ -253,6 +254,13 @@ export {
type BrowserToolInput,
type BrowserToolOptions,
browserTool,
type ComputerObservationMode,
type ComputerOperations,
type ComputerToolAction,
type ComputerToolDetails,
type ComputerToolInput,
type ComputerToolOptions,
computerTool,
codingTools,
defaultCodingToolNames,
DEFAULT_MAX_BYTES,

View file

@ -0,0 +1,339 @@
import { spawnSync } from "node:child_process";
import {
chmodSync,
existsSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { parseArgs } from "../src/cli/args.js";
import { buildSystemPrompt } from "../src/core/system-prompt.js";
import {
type ComputerOperations,
type ComputerToolDetails,
createAllTools,
createComputerTool,
defaultCodingToolNames,
} from "../src/core/tools/index.js";
interface TextBlock {
type: "text";
text: string;
}
type ToolContentBlock = TextBlock | { type: string };
interface ToolResultLike {
content: ToolContentBlock[];
details?: unknown;
}
interface ComputerExecCall {
command: string;
args: string[];
cwd: string;
env: NodeJS.ProcessEnv;
timeout?: number;
}
function getTextOutput(result: ToolResultLike): string {
return result.content
.filter((block): block is TextBlock => block.type === "text")
.map((block) => block.text)
.join("\n");
}
function createMockComputerOperations(
output = "",
exitCode: number | null = 0,
): {
calls: ComputerExecCall[];
operations: ComputerOperations;
} {
const calls: ComputerExecCall[] = [];
return {
calls,
operations: {
exec: async (command, args, options) => {
calls.push({
command,
args,
cwd: options.cwd,
env: options.env,
timeout: options.timeout,
});
if (output.length > 0) {
options.onData(Buffer.from(output, "utf-8"));
}
return { exitCode };
},
},
};
}
function getAgentComputerScriptPath(): string {
return resolve(
process.cwd(),
"../../../../docker/companion/agent-computer.js",
);
}
describe("computer tool", () => {
const tempDirs: string[] = [];
afterEach(() => {
while (tempDirs.length > 0) {
const tempDir = tempDirs.pop();
if (tempDir) {
rmSync(tempDir, { recursive: true, force: true });
}
}
});
function createTempDir(prefix: string): string {
const tempDir = mkdtempSync(join(tmpdir(), prefix));
tempDirs.push(tempDir);
return tempDir;
}
it("observes the desktop through the agent-computer helper", async () => {
const cwd = createTempDir("coding-agent-computer-observe-");
const stateDir = join(cwd, "computer-state");
const { calls, operations } = createMockComputerOperations(
JSON.stringify({
ok: true,
action: "observe",
summary: "Captured desktop snapshot snap-1",
snapshot: {
snapshotId: "snap-1",
screenshotPath: "/tmp/snap-1.png",
backend: "hybrid",
activeWindow: null,
windows: [],
refs: [],
},
}),
);
const computerTool = createComputerTool(cwd, {
operations,
command: "agent-computer-test",
stateDir,
});
const result = (await computerTool.execute("computer-observe", {
action: "observe",
})) as ToolResultLike;
expect(calls).toHaveLength(1);
expect(calls[0]).toMatchObject({
command: "agent-computer-test",
args: ["--state-dir", stateDir, "--input", '{"action":"observe"}'],
cwd,
timeout: 90,
});
const details = result.details as ComputerToolDetails | undefined;
expect(details?.stateDir).toBe(stateDir);
expect(details?.snapshotId).toBe("snap-1");
expect(details?.screenshotPath).toBe("/tmp/snap-1.png");
expect(getTextOutput(result)).toContain('"snapshotId": "snap-1"');
});
it("validates click targets before spawning the helper", async () => {
const cwd = createTempDir("coding-agent-computer-click-");
const stateDir = join(cwd, "computer-state");
const { calls, operations } = createMockComputerOperations();
const computerTool = createComputerTool(cwd, {
operations,
stateDir,
});
await expect(
computerTool.execute("computer-click-missing-target", {
action: "click",
}),
).rejects.toThrow(
"computer click requires snapshotId and ref, or explicit x and y coordinates",
);
expect(calls).toHaveLength(0);
});
it("rejects unsafe snapshot ids before spawning the helper", async () => {
const cwd = createTempDir("coding-agent-computer-snapshot-id-");
const stateDir = join(cwd, "computer-state");
const { calls, operations } = createMockComputerOperations();
const computerTool = createComputerTool(cwd, {
operations,
stateDir,
});
await expect(
computerTool.execute("computer-click-invalid-snapshot", {
action: "click",
snapshotId: "../../auth",
ref: "w1",
}),
).rejects.toThrow('Invalid computer snapshotId: "../../auth"');
expect(calls).toHaveLength(0);
});
it("accepts computer in --tools and exposes it in built-in tool wiring", () => {
const parsed = parseArgs(["--tools", "computer,read"]);
expect(parsed.tools).toEqual(["computer", "read"]);
expect(defaultCodingToolNames).toContain("computer");
expect(createAllTools(process.cwd()).computer.name).toBe("computer");
});
it("mentions computer in the default system prompt", () => {
const prompt = buildSystemPrompt();
expect(prompt).toContain(
"- computer: Use the desktop computer: observe the screen",
);
expect(prompt).toContain(
"Computer: observe before interacting. Use it for native UI",
);
expect(prompt).toContain(
"Prefer browser for websites and DOM-aware tasks. Switch to computer",
);
});
it("rejects accessibility observe mode until a non-screenshot backend exists", () => {
const stateDir = createTempDir(
"coding-agent-computer-helper-accessibility-",
);
const result = spawnSync(
process.execPath,
[
"--no-warnings",
getAgentComputerScriptPath(),
"--state-dir",
stateDir,
"--input",
JSON.stringify({
action: "observe",
mode: "accessibility",
}),
],
{
encoding: "utf8",
},
);
expect(result.status).not.toBe(0);
expect(result.stderr).toContain(
"backend_unavailable: accessibility observe mode is not implemented",
);
});
it("refuses to shell out when app_open cannot match an installed app", () => {
const stateDir = createTempDir("coding-agent-computer-helper-app-open-");
const markerPath = join(stateDir, "should-not-exist");
const result = spawnSync(
process.execPath,
[
"--no-warnings",
getAgentComputerScriptPath(),
"--state-dir",
stateDir,
"--input",
JSON.stringify({
action: "app_open",
app: `definitely-not-an-installed-app && touch ${markerPath}`,
}),
],
{
encoding: "utf8",
},
);
expect(result.status).not.toBe(0);
expect(result.stderr).toContain("app_not_found:");
expect(existsSync(markerPath)).toBe(false);
});
it("rejects snapshot path traversal inside the helper", () => {
const stateDir = createTempDir("coding-agent-computer-helper-snapshot-id-");
const result = spawnSync(
process.execPath,
[
"--no-warnings",
getAgentComputerScriptPath(),
"--state-dir",
stateDir,
"--input",
JSON.stringify({
action: "click",
snapshotId: "../../auth",
ref: "w1",
}),
],
{
encoding: "utf8",
},
);
expect(result.status).not.toBe(0);
expect(result.stderr).toContain("invalid_snapshot_id: ../../auth");
});
it("passes typed text after the xdotool option separator", () => {
const stateDir = createTempDir("coding-agent-computer-helper-type-");
const binDir = createTempDir("coding-agent-computer-helper-bin-");
const argsPath = join(stateDir, "xdotool-args.json");
const xdotoolPath = join(binDir, "xdotool");
writeFileSync(
xdotoolPath,
`#!/usr/bin/env node
const { writeFileSync } = require("node:fs");
writeFileSync(process.env.TEST_XDOTOOL_ARGS_PATH, JSON.stringify(process.argv.slice(2)));
`,
"utf8",
);
chmodSync(xdotoolPath, 0o755);
const result = spawnSync(
process.execPath,
[
"--no-warnings",
getAgentComputerScriptPath(),
"--state-dir",
stateDir,
"--input",
JSON.stringify({
action: "type",
text: "--delay",
}),
],
{
encoding: "utf8",
env: {
...process.env,
PATH: `${binDir}:${process.env.PATH ?? ""}`,
TEST_XDOTOOL_ARGS_PATH: argsPath,
},
},
);
expect(result.status).toBe(0);
expect(JSON.parse(readFileSync(argsPath, "utf8"))).toEqual([
"type",
"--delay",
"12",
"--clearmodifiers",
"--",
"--delay",
]);
});
});

View file

@ -4,11 +4,10 @@ import { TmuxAdapter } from "./tmux-adapter";
describe("TmuxAdapter", () => {
let adapter: TmuxAdapter;
let mockExecCommand: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
adapter = new TmuxAdapter();
mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
vi.spyOn(terminalAdapter, "execCommand");
delete process.env.TMUX;
delete process.env.ZELLIJ;
delete process.env.WEZTERM_PANE;
@ -21,6 +20,7 @@ describe("TmuxAdapter", () => {
});
it("detects tmux in headless runtimes when the binary is available", () => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockReturnValue({
stdout: "tmux 3.4",
stderr: "",
@ -33,6 +33,7 @@ describe("TmuxAdapter", () => {
it("does not detect tmux in GUI terminals just because the binary exists", () => {
process.env.COLORTERM = "truecolor";
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockReturnValue({
stdout: "tmux 3.4",
stderr: "",
@ -44,7 +45,8 @@ describe("TmuxAdapter", () => {
});
it("creates a detached team session when not already inside tmux", () => {
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockImplementation((_bin, args) => {
if (args[0] === "has-session") {
return { stdout: "", stderr: "missing", status: 1 };
}
@ -65,12 +67,18 @@ describe("TmuxAdapter", () => {
expect(mockExecCommand).toHaveBeenCalledWith(
"tmux",
expect.arrayContaining(["new-session", "-d", "-s", "companion-teams-demo"]),
expect.arrayContaining([
"new-session",
"-d",
"-s",
"companion-teams-demo",
]),
);
});
it("splits an existing detached session when not already inside tmux", () => {
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockImplementation((_bin, args) => {
if (args[0] === "has-session") {
return { stdout: "", stderr: "", status: 0 };
}
@ -96,6 +104,7 @@ describe("TmuxAdapter", () => {
});
it("checks pane liveness by pane id", () => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockReturnValue({
stdout: "%1\n%7\n",
stderr: "",

View file

@ -8,11 +8,10 @@ import { WezTermAdapter } from "./wezterm-adapter";
describe("WezTermAdapter", () => {
let adapter: WezTermAdapter;
let mockExecCommand: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
adapter = new WezTermAdapter();
mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
vi.spyOn(terminalAdapter, "execCommand");
delete process.env.WEZTERM_PANE;
delete process.env.TMUX;
delete process.env.ZELLIJ;
@ -31,6 +30,7 @@ describe("WezTermAdapter", () => {
describe("detect", () => {
it("should detect when WEZTERM_PANE is set", () => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockReturnValue({
stdout: "version 1.0",
stderr: "",
@ -43,7 +43,8 @@ describe("WezTermAdapter", () => {
describe("spawn", () => {
it("should spawn first pane to the right with 50%", () => {
// Mock getPanes finding only current pane
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockImplementation((_bin, args) => {
if (args.includes("list")) {
return {
stdout: JSON.stringify([{ pane_id: 0, tab_id: 0 }]),
@ -79,7 +80,8 @@ describe("WezTermAdapter", () => {
it("should spawn subsequent panes by splitting the sidebar", () => {
// Mock getPanes finding current pane (0) and sidebar pane (1)
mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
mockExecCommand.mockImplementation((_bin, args) => {
if (args.includes("list")) {
return {
stdout: JSON.stringify([