computer use tool

2026-04-15 05:02:07 +00:00 · 2026-03-11 14:13:03 -04:00 · 2026-03-11 14:13:03 -04:00 · e1bba1c1a5
commit e1bba1c1a5
parent 3919bbf708
9 changed files with 911 additions and 17 deletions
--- a/packages/coding-agent/src/cli/args.ts
+++ b/packages/coding-agent/src/cli/args.ts
@ -325,6 +325,7 @@ ${chalk.bold("Environment Variables:")}
  AWS_REGION                       - AWS region for Amazon Bedrock (e.g., us-east-1)
  ${ENV_AGENT_DIR.padEnd(32)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent)
  COMPANION_PACKAGE_DIR                   - Override package directory (for Nix/Guix store paths)
  COMPANION_AGENT_COMPUTER_COMMAND        - Override the computer helper command (default: agent-computer)
  COMPANION_OFFLINE                       - Disable startup network operations when set to 1/true/yes
  COMPANION_SHARE_VIEWER_URL              - Base URL for /share command (default: https://companion.dev/session/)
  COMPANION_AI_ANTIGRAVITY_VERSION        - Override Antigravity User-Agent version (e.g., 1.23.0)
@ -333,6 +334,7 @@ ${chalk.bold(`Available Tools (default: ${defaultToolsText}):`)}
  read   - Read file contents
  bash   - Execute bash commands
  browser - Browser automation with persistent state
  computer - Desktop computer automation with screen observation and native UI control
  edit   - Edit files with find/replace
  write  - Write files (creates/overwrites)
  grep   - Search file contents (read-only, off by default)
--- a/packages/coding-agent/src/core/sdk.ts
+++ b/packages/coding-agent/src/core/sdk.ts
@ -26,10 +26,12 @@ import {
  allTools,
  bashTool,
  browserTool,
  computerTool,
  codingTools,
  defaultCodingToolNames,
  createBashTool,
  createBrowserTool,
  createComputerTool,
  createCodingTools,
  createEditTool,
  createFindTool,
@ -67,7 +69,7 @@ export interface CreateAgentSessionOptions {
  /** Models available for cycling (Ctrl+P in interactive mode) */
  scopedModels?: Array<{ model: Model<any>; thinkingLevel?: ThinkingLevel }>;
-  /** Built-in tools to use. Default: codingTools [read, bash, browser, edit, write] */
+  /** Built-in tools to use. Default: codingTools [read, bash, browser, computer, edit, write] */
  tools?: Tool[];
  /** Custom tools to register (in addition to built-in tools). */
  customTools?: ToolDefinition[];
@ -113,6 +115,7 @@ export {
  readTool,
  bashTool,
  browserTool,
  computerTool,
  editTool,
  writeTool,
  grepTool,
@ -127,6 +130,7 @@ export {
  createReadTool,
  createBashTool,
  createBrowserTool,
  createComputerTool,
  createEditTool,
  createWriteTool,
  createGrepTool,
--- a/packages/coding-agent/src/core/system-prompt.ts
+++ b/packages/coding-agent/src/core/system-prompt.ts
@ -11,6 +11,8 @@ const toolDescriptions: Record<string, string> = {
  bash: "Run shell commands",
  browser:
    "Browse the web: open, snapshot, click, fill, wait, screenshot, save/load state",
  computer:
    "Use the desktop computer: observe the screen, click, type, send hotkeys, manage apps/windows, wait for native UI, and read/write the clipboard",
  edit: "Surgical file edits (find exact text, replace it)",
  write: "Create new files or completely rewrite existing ones",
  grep: "Search file contents by regex (respects .gitignore)",
@ -167,6 +169,7 @@ export function buildSystemPrompt(
  const hasBash = tools.includes("bash");
  const hasBrowser = tools.includes("browser");
  const hasComputer = tools.includes("computer");
  const hasEdit = tools.includes("edit");
  const hasWrite = tools.includes("write");
  const hasGrep = tools.includes("grep");
@ -215,6 +218,16 @@ export function buildSystemPrompt(
      "Browser: snapshot before interacting with elements. Use it for research and learning too, not just automation",
    );
  }
  if (hasComputer) {
    addGuideline(
      "Computer: observe before interacting. Use it for native UI, desktop apps, file pickers, downloads, and OS dialogs",
    );
  }
  if (hasBrowser && hasComputer) {
    addGuideline(
      "Prefer browser for websites and DOM-aware tasks. Switch to computer when native UI or desktop state matters",
    );
  }
  // Output hygiene
  if (hasEdit || hasWrite) {
--- a/packages/coding-agent/src/core/tools/computer.ts
+++ b/packages/coding-agent/src/core/tools/computer.ts
@ -0,0 +1,666 @@
 import { spawn } from "node:child_process";
 import { mkdirSync } from "node:fs";
 import { join, resolve } from "node:path";
 import type { AgentTool } from "@mariozechner/companion-agent-core";
 import { type Static, Type } from "@sinclair/typebox";
 import { getAgentDir } from "../../config.js";
 import {
  getShellEnv,
  killProcessTree,
  sanitizeBinaryOutput,
 } from "../../utils/shell.js";
 const computerActions = [
  "observe",
  "click",
  "type",
  "hotkey",
  "scroll",
  "drag",
  "wait",
  "app_list",
  "app_open",
  "app_focus",
  "window_list",
  "window_focus",
  "window_move",
  "window_resize",
  "window_close",
  "clipboard_read",
  "clipboard_write",
 ] as const;
 const computerObservationModes = ["hybrid", "ocr", "accessibility"] as const;
 const DEFAULT_COMPUTER_COMMAND =
  process.env.COMPANION_AGENT_COMPUTER_COMMAND || "agent-computer";
 const DEFAULT_COMPUTER_TIMEOUT_SECONDS = 90;
 const computerSchema = Type.Object({
  action: Type.Union(
    computerActions.map((action) => Type.Literal(action)),
    { description: "Computer action to execute" },
  ),
  snapshotId: Type.Optional(
    Type.String({ description: "Snapshot ID returned from observe" }),
  ),
  ref: Type.Optional(
    Type.String({
      description:
        "Target ref from observe output, such as w1 for a window or t3 for OCR text",
    }),
  ),
  x: Type.Optional(Type.Number({ description: "Target x coordinate" })),
  y: Type.Optional(Type.Number({ description: "Target y coordinate" })),
  toRef: Type.Optional(
    Type.String({ description: "Destination ref for drag actions" }),
  ),
  toX: Type.Optional(
    Type.Number({ description: "Destination x coordinate for drag actions" }),
  ),
  toY: Type.Optional(
    Type.Number({ description: "Destination y coordinate for drag actions" }),
  ),
  text: Type.Optional(
    Type.String({
      description:
        "Text to type, text to wait for, or clipboard contents depending on action",
    }),
  ),
  keys: Type.Optional(
    Type.Array(Type.String(), {
      description: "Hotkey chord or key sequence, for example ['ctrl', 'l']",
      minItems: 1,
    }),
  ),
  app: Type.Optional(
    Type.String({
      description:
        "Installed app or running app name/class for app_open, app_focus, and wait",
    }),
  ),
  windowId: Type.Optional(
    Type.String({ description: "Window ID, such as 0x04200007" }),
  ),
  windowTitle: Type.Optional(
    Type.String({ description: "Window title substring to match" }),
  ),
  mode: Type.Optional(
    Type.Union(
      computerObservationModes.map((mode) => Type.Literal(mode)),
      { description: "Observation mode. Defaults to hybrid." },
    ),
  ),
  amount: Type.Optional(
    Type.Number({
      description:
        "Scroll amount in wheel steps. Positive scrolls down/right, negative scrolls up/left.",
    }),
  ),
  width: Type.Optional(
    Type.Number({ description: "Target window width for resize actions" }),
  ),
  height: Type.Optional(
    Type.Number({ description: "Target window height for resize actions" }),
  ),
  clear: Type.Optional(
    Type.Boolean({
      description: "Clear the active input field before typing",
    }),
  ),
  button: Type.Optional(
    Type.Number({
      description: "Mouse button for click or drag. Defaults to 1.",
      minimum: 1,
      maximum: 7,
    }),
  ),
  timeoutMs: Type.Optional(
    Type.Number({
      description: "Wait timeout in milliseconds for observe-derived waits",
      minimum: 0,
    }),
  ),
  intervalMs: Type.Optional(
    Type.Number({
      description: "Polling interval for wait actions in milliseconds",
      minimum: 10,
    }),
  ),
 });
 export type ComputerToolAction = (typeof computerActions)[number];
 export type ComputerObservationMode = (typeof computerObservationModes)[number];
 export type ComputerToolInput = Static<typeof computerSchema>;
 export interface ComputerToolDetails {
  action: ComputerToolAction;
  command: string;
  args: string[];
  stateDir: string;
  snapshotId?: string;
  screenshotPath?: string;
 }
 export interface ComputerOperations {
  exec: (
    command: string,
    args: string[],
    options: {
      cwd: string;
      env: NodeJS.ProcessEnv;
      onData: (data: Buffer) => void;
      signal?: AbortSignal;
      timeout?: number;
    },
  ) => Promise<{ exitCode: number | null }>;
 }
 const defaultComputerOperations: ComputerOperations = {
  exec: (command, args, { cwd, env, onData, signal, timeout }) => {
    return new Promise((resolvePromise, rejectPromise) => {
      const child = spawn(command, args, {
        cwd,
        detached: true,
        env,
        stdio: ["ignore", "pipe", "pipe"],
      });
      let timedOut = false;
      let timeoutHandle: NodeJS.Timeout | undefined;
      if (timeout !== undefined && timeout > 0) {
        timeoutHandle = setTimeout(() => {
          timedOut = true;
          if (child.pid) {
            killProcessTree(child.pid);
          }
        }, timeout * 1000);
      }
      if (child.stdout) {
        child.stdout.on("data", onData);
      }
      if (child.stderr) {
        child.stderr.on("data", onData);
      }
      const onAbort = () => {
        if (child.pid) {
          killProcessTree(child.pid);
        }
      };
      if (signal) {
        if (signal.aborted) {
          onAbort();
        } else {
          signal.addEventListener("abort", onAbort, { once: true });
        }
      }
      child.on("error", (error) => {
        if (timeoutHandle) clearTimeout(timeoutHandle);
        if (signal) signal.removeEventListener("abort", onAbort);
        rejectPromise(error);
      });
      child.on("close", (code) => {
        if (timeoutHandle) clearTimeout(timeoutHandle);
        if (signal) signal.removeEventListener("abort", onAbort);
        if (signal?.aborted) {
          rejectPromise(new Error("aborted"));
          return;
        }
        if (timedOut) {
          rejectPromise(new Error(`timeout:${timeout}`));
          return;
        }
        resolvePromise({ exitCode: code });
      });
    });
  },
 };
 export interface ComputerToolOptions {
  operations?: ComputerOperations;
  command?: string;
  defaultTimeoutSeconds?: number;
  stateDir?: string;
  agentDir?: string;
 }
 interface ComputerCommandContext {
  action: ComputerToolAction;
  args: string[];
  statusMessage: string;
  successMessage: string;
  stateDir: string;
 }
 function resolveCommandPath(cwd: string, inputPath: string): string {
  return resolve(cwd, inputPath);
 }
 function getComputerRootDir(options?: ComputerToolOptions): string {
  const baseAgentDir = options?.agentDir ?? getAgentDir();
  return join(baseAgentDir, "computer");
 }
 function getComputerStateDir(
  cwd: string,
  options?: ComputerToolOptions,
 ): string {
  const stateDir = options?.stateDir ?? getComputerRootDir(options);
  return resolveCommandPath(cwd, stateDir);
 }
 function ensureComputerDir(stateDir: string): void {
  mkdirSync(stateDir, { recursive: true });
 }
 function normalizeOutput(chunks: Buffer[]): string {
  return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
 }
 function hasCoordinateTarget(input: ComputerToolInput): boolean {
  return input.x !== undefined && input.y !== undefined;
 }
 function hasRefTarget(input: ComputerToolInput): boolean {
  return input.snapshotId !== undefined && input.ref !== undefined;
 }
 function hasWindowTarget(input: ComputerToolInput): boolean {
  return input.windowId !== undefined || input.windowTitle !== undefined;
 }
 function hasDragDestination(input: ComputerToolInput): boolean {
  return (
    input.toRef !== undefined ||
    (input.toX !== undefined && input.toY !== undefined)
  );
 }
 function validateWaitInput(input: ComputerToolInput): void {
  const targetCount =
    (input.ref !== undefined ? 1 : 0) +
    (input.text !== undefined ? 1 : 0) +
    (input.app !== undefined ? 1 : 0) +
    (input.windowId !== undefined ? 1 : 0) +
    (input.windowTitle !== undefined ? 1 : 0);
  if (targetCount === 0 && input.timeoutMs === undefined) {
    throw new Error(
      "computer wait requires one of ref, text, app, windowId, windowTitle, or timeoutMs",
    );
  }
  if (targetCount > 1) {
    throw new Error(
      "computer wait requires exactly one of ref, text, app, windowId, or windowTitle",
    );
  }
 }
 function validateComputerInput(input: ComputerToolInput): void {
  switch (input.action) {
    case "observe":
    case "app_list":
    case "window_list":
    case "clipboard_read":
      return;
    case "click":
      if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
        throw new Error(
          "computer click requires snapshotId and ref, or explicit x and y coordinates",
        );
      }
      return;
    case "type":
      if (input.text === undefined) {
        throw new Error("computer type requires text");
      }
      if (input.ref !== undefined && input.snapshotId === undefined) {
        throw new Error("computer type with ref requires snapshotId");
      }
      return;
    case "hotkey":
      if (!input.keys || input.keys.length === 0) {
        throw new Error("computer hotkey requires keys");
      }
      return;
    case "scroll":
      if (input.amount === undefined || input.amount === 0) {
        throw new Error("computer scroll requires a non-zero amount");
      }
      if (input.ref !== undefined && input.snapshotId === undefined) {
        throw new Error("computer scroll with ref requires snapshotId");
      }
      return;
    case "drag":
      if (!hasRefTarget(input) && !hasCoordinateTarget(input)) {
        throw new Error(
          "computer drag requires a starting target via snapshotId and ref, or x and y coordinates",
        );
      }
      if (!hasDragDestination(input)) {
        throw new Error(
          "computer drag requires a destination via toRef, or explicit toX and toY coordinates",
        );
      }
      if (input.toRef !== undefined && input.snapshotId === undefined) {
        throw new Error("computer drag with toRef requires snapshotId");
      }
      return;
    case "wait":
      validateWaitInput(input);
      if (input.ref !== undefined && input.snapshotId === undefined) {
        throw new Error("computer wait with ref requires snapshotId");
      }
      return;
    case "app_open":
    case "app_focus":
      if (!input.app) {
        throw new Error(`computer ${input.action} requires app`);
      }
      return;
    case "window_focus":
    case "window_close":
      if (!hasWindowTarget(input)) {
        throw new Error(
          `computer ${input.action} requires windowId or windowTitle`,
        );
      }
      return;
    case "window_move":
      if (!hasWindowTarget(input)) {
        throw new Error(
          "computer window_move requires windowId or windowTitle",
        );
      }
      if (input.x === undefined || input.y === undefined) {
        throw new Error("computer window_move requires x and y");
      }
      return;
    case "window_resize":
      if (!hasWindowTarget(input)) {
        throw new Error(
          "computer window_resize requires windowId or windowTitle",
        );
      }
      if (input.width === undefined || input.height === undefined) {
        throw new Error("computer window_resize requires width and height");
      }
      return;
    case "clipboard_write":
      if (input.text === undefined) {
        throw new Error("computer clipboard_write requires text");
      }
      return;
    default: {
      const unsupportedAction: never = input.action;
      throw new Error(`Unsupported computer action: ${unsupportedAction}`);
    }
  }
 }
 function describeAction(input: ComputerToolInput): {
  statusMessage: string;
  successMessage: string;
 } {
  switch (input.action) {
    case "observe":
      return {
        statusMessage: "Observing desktop...",
        successMessage: "Captured desktop snapshot",
      };
    case "click":
      return {
        statusMessage: "Clicking desktop target...",
        successMessage: "Clicked desktop target",
      };
    case "type":
      return {
        statusMessage: "Typing into desktop...",
        successMessage: "Typed into desktop",
      };
    case "hotkey":
      return {
        statusMessage: "Sending hotkey...",
        successMessage: "Sent hotkey",
      };
    case "scroll":
      return {
        statusMessage: "Scrolling desktop...",
        successMessage: "Scrolled desktop",
      };
    case "drag":
      return {
        statusMessage: "Dragging desktop target...",
        successMessage: "Dragged desktop target",
      };
    case "wait":
      return {
        statusMessage: "Waiting for desktop state...",
        successMessage: "Desktop wait condition satisfied",
      };
    case "app_list":
      return {
        statusMessage: "Listing apps...",
        successMessage: "Listed apps",
      };
    case "app_open":
      return {
        statusMessage: `Opening app ${input.app}...`,
        successMessage: `Opened app ${input.app}`,
      };
    case "app_focus":
      return {
        statusMessage: `Focusing app ${input.app}...`,
        successMessage: `Focused app ${input.app}`,
      };
    case "window_list":
      return {
        statusMessage: "Listing windows...",
        successMessage: "Listed windows",
      };
    case "window_focus":
      return {
        statusMessage: "Focusing window...",
        successMessage: "Focused window",
      };
    case "window_move":
      return {
        statusMessage: "Moving window...",
        successMessage: "Moved window",
      };
    case "window_resize":
      return {
        statusMessage: "Resizing window...",
        successMessage: "Resized window",
      };
    case "window_close":
      return {
        statusMessage: "Closing window...",
        successMessage: "Closed window",
      };
    case "clipboard_read":
      return {
        statusMessage: "Reading clipboard...",
        successMessage: "Read clipboard",
      };
    case "clipboard_write":
      return {
        statusMessage: "Writing clipboard...",
        successMessage: "Wrote clipboard",
      };
  }
 }
 function buildComputerCommand(
  cwd: string,
  input: ComputerToolInput,
  options?: ComputerToolOptions,
 ): ComputerCommandContext {
  validateComputerInput(input);
  const stateDir = getComputerStateDir(cwd, options);
  ensureComputerDir(stateDir);
  const actionDescription = describeAction(input);
  return {
    action: input.action,
    args: ["--state-dir", stateDir, "--input", JSON.stringify(input)],
    statusMessage: actionDescription.statusMessage,
    successMessage: actionDescription.successMessage,
    stateDir,
  };
 }
 function buildComputerErrorMessage(
  action: ComputerToolAction,
  output: string,
  exitCode: number | null,
 ): string {
  const base =
    exitCode === null
      ? `Computer action "${action}" failed`
      : `Computer action "${action}" exited with code ${exitCode}`;
  return output.length > 0 ? `${output}\n\n${base}` : base;
 }
 function getMissingComputerCommandMessage(command: string): string {
  return [
    `Computer tool could not find "${command}".`,
    "Desktop sandboxes install agent-computer alongside the browser tool.",
    "If you are running locally, either install the helper or omit the computer tool.",
    "Recommended setup inside a sandbox image: copy agent-computer into /usr/local/bin and install xdotool, wmctrl, tesseract-ocr, and xclip.",
  ].join("\n");
 }
 function parseComputerPayload(output: string): {
  text: string;
  snapshotId?: string;
  screenshotPath?: string;
 } {
  if (output.length === 0) {
    return { text: "" };
  }
  try {
    const payload = JSON.parse(output) as {
      snapshot?: { snapshotId?: string; screenshotPath?: string };
      summary?: string;
      screenshotPath?: string;
      snapshotId?: string;
    };
    return {
      text: JSON.stringify(payload, null, 2),
      snapshotId: payload.snapshot?.snapshotId ?? payload.snapshotId,
      screenshotPath:
        payload.snapshot?.screenshotPath ?? payload.screenshotPath,
    };
  } catch {
    return { text: output };
  }
 }
 export function createComputerTool(
  cwd: string,
  options?: ComputerToolOptions,
 ): AgentTool<typeof computerSchema> {
  const operations = options?.operations ?? defaultComputerOperations;
  const command = options?.command ?? DEFAULT_COMPUTER_COMMAND;
  const defaultTimeoutSeconds =
    options?.defaultTimeoutSeconds ?? DEFAULT_COMPUTER_TIMEOUT_SECONDS;
  return {
    name: "computer",
    label: "computer",
    description:
      "Use the desktop computer when browser DOM control is not enough: observe the screen, interact with windows and apps, type, click, drag, scroll, wait for native UI changes, and read or write the clipboard.",
    parameters: computerSchema,
    execute: async (_toolCallId, input, signal, onUpdate) => {
      const commandContext = buildComputerCommand(cwd, input, options);
      const details: ComputerToolDetails = {
        action: commandContext.action,
        command,
        args: commandContext.args,
        stateDir: commandContext.stateDir,
      };
      onUpdate?.({
        content: [{ type: "text", text: commandContext.statusMessage }],
        details,
      });
      const chunks: Buffer[] = [];
      try {
        const { exitCode } = await operations.exec(
          command,
          commandContext.args,
          {
            cwd,
            env: getShellEnv(),
            onData: (data) => chunks.push(data),
            signal,
            timeout: defaultTimeoutSeconds,
          },
        );
        const output = normalizeOutput(chunks);
        if (exitCode !== 0) {
          throw new Error(
            buildComputerErrorMessage(commandContext.action, output, exitCode),
          );
        }
        const parsed = parseComputerPayload(output);
        if (parsed.snapshotId) {
          details.snapshotId = parsed.snapshotId;
        }
        if (parsed.screenshotPath) {
          details.screenshotPath = parsed.screenshotPath;
        }
        return {
          content: [
            {
              type: "text",
              text:
                parsed.text.length > 0
                  ? parsed.text
                  : commandContext.successMessage,
            },
          ],
          details,
        };
      } catch (error) {
        if (
          error instanceof Error &&
          "code" in error &&
          error.code === "ENOENT"
        ) {
          throw new Error(getMissingComputerCommandMessage(command));
        }
        if (error instanceof Error && error.message === "aborted") {
          throw new Error(`Computer action "${commandContext.action}" aborted`);
        }
        if (error instanceof Error && error.message.startsWith("timeout:")) {
          const seconds = error.message.split(":")[1];
          throw new Error(
            `Computer action "${commandContext.action}" timed out after ${seconds} seconds`,
          );
        }
        throw error;
      }
    },
  };
 }
 export const computerTool = createComputerTool(process.cwd());
--- a/packages/coding-agent/src/core/tools/index.ts
+++ b/packages/coding-agent/src/core/tools/index.ts
@ -19,6 +19,16 @@ export {
  browserTool,
  createBrowserTool,
 } from "./browser.js";
 export {
  type ComputerObservationMode,
  type ComputerOperations,
  type ComputerToolAction,
  type ComputerToolDetails,
  type ComputerToolInput,
  type ComputerToolOptions,
  computerTool,
  createComputerTool,
 } from "./computer.js";
 export {
  createEditTool,
  type EditOperations,
@ -84,6 +94,11 @@ import {
  createBrowserTool,
  type BrowserToolOptions,
 } from "./browser.js";
 import {
  computerTool,
  createComputerTool,
  type ComputerToolOptions,
 } from "./computer.js";
 import { createEditTool, editTool } from "./edit.js";
 import { createFindTool, findTool } from "./find.js";
 import { createGrepTool, grepTool } from "./grep.js";
@ -102,6 +117,7 @@ export const allTools = {
  read: readTool,
  bash: bashTool,
  browser: browserTool,
  computer: computerTool,
  edit: editTool,
  write: writeTool,
  grep: grepTool,
@ -115,6 +131,7 @@ export const defaultCodingToolNames: ToolName[] = [
  "read",
  "bash",
  "browser",
  "computer",
  "edit",
  "write",
 ];
@ -131,19 +148,16 @@ export interface ToolsOptions {
  bash?: BashToolOptions;
  /** Options for the browser tool */
  browser?: BrowserToolOptions;
  /** Options for the computer tool */
  computer?: ComputerToolOptions;
 }
 /**
 * Create coding tools configured for a specific working directory.
 */
 export function createCodingTools(cwd: string, options?: ToolsOptions): Tool[] {
-  return [
+  const tools = createAllTools(cwd, options);
-    createReadTool(cwd, options?.read),
+  return defaultCodingToolNames.map((toolName) => tools[toolName]);
    createBashTool(cwd, options?.bash),
    createBrowserTool(cwd, options?.browser),
    createEditTool(cwd),
    createWriteTool(cwd),
  ];
 }
 /**
@ -172,6 +186,7 @@ export function createAllTools(
    read: createReadTool(cwd, options?.read),
    bash: createBashTool(cwd, options?.bash),
    browser: createBrowserTool(cwd, options?.browser),
    computer: createComputerTool(cwd, options?.computer),
    edit: createEditTool(cwd),
    write: createWriteTool(cwd),
    grep: createGrepTool(cwd),
--- a/packages/coding-agent/src/index.ts
+++ b/packages/coding-agent/src/index.ts
@ -182,6 +182,7 @@ export {
  createAgentSession,
  createBashTool,
  createBrowserTool,
  createComputerTool,
  // Tool factories (for custom cwd)
  createCodingTools,
  createEditTool,
@ -253,6 +254,13 @@ export {
  type BrowserToolInput,
  type BrowserToolOptions,
  browserTool,
  type ComputerObservationMode,
  type ComputerOperations,
  type ComputerToolAction,
  type ComputerToolDetails,
  type ComputerToolInput,
  type ComputerToolOptions,
  computerTool,
  codingTools,
  defaultCodingToolNames,
  DEFAULT_MAX_BYTES,
--- a/packages/coding-agent/test/computer-tool.test.ts
+++ b/packages/coding-agent/test/computer-tool.test.ts
@ -0,0 +1,175 @@
 import { mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, it } from "vitest";
 import { parseArgs } from "../src/cli/args.js";
 import { buildSystemPrompt } from "../src/core/system-prompt.js";
 import {
  type ComputerOperations,
  type ComputerToolDetails,
  createAllTools,
  createComputerTool,
  defaultCodingToolNames,
 } from "../src/core/tools/index.js";
 interface TextBlock {
  type: "text";
  text: string;
 }
 type ToolContentBlock = TextBlock | { type: string };
 interface ToolResultLike {
  content: ToolContentBlock[];
  details?: unknown;
 }
 interface ComputerExecCall {
  command: string;
  args: string[];
  cwd: string;
  env: NodeJS.ProcessEnv;
  timeout?: number;
 }
 function getTextOutput(result: ToolResultLike): string {
  return result.content
    .filter((block): block is TextBlock => block.type === "text")
    .map((block) => block.text)
    .join("\n");
 }
 function createMockComputerOperations(
  output = "",
  exitCode: number | null = 0,
 ): {
  calls: ComputerExecCall[];
  operations: ComputerOperations;
 } {
  const calls: ComputerExecCall[] = [];
  return {
    calls,
    operations: {
      exec: async (command, args, options) => {
        calls.push({
          command,
          args,
          cwd: options.cwd,
          env: options.env,
          timeout: options.timeout,
        });
        if (output.length > 0) {
          options.onData(Buffer.from(output, "utf-8"));
        }
        return { exitCode };
      },
    },
  };
 }
 describe("computer tool", () => {
  const tempDirs: string[] = [];
  afterEach(() => {
    while (tempDirs.length > 0) {
      const tempDir = tempDirs.pop();
      if (tempDir) {
        rmSync(tempDir, { recursive: true, force: true });
      }
    }
  });
  function createTempDir(prefix: string): string {
    const tempDir = mkdtempSync(join(tmpdir(), prefix));
    tempDirs.push(tempDir);
    return tempDir;
  }
  it("observes the desktop through the agent-computer helper", async () => {
    const cwd = createTempDir("coding-agent-computer-observe-");
    const stateDir = join(cwd, "computer-state");
    const { calls, operations } = createMockComputerOperations(
      JSON.stringify({
        ok: true,
        action: "observe",
        summary: "Captured desktop snapshot snap-1",
        snapshot: {
          snapshotId: "snap-1",
          screenshotPath: "/tmp/snap-1.png",
          backend: "hybrid",
          activeWindow: null,
          windows: [],
          refs: [],
        },
      }),
    );
    const computerTool = createComputerTool(cwd, {
      operations,
      command: "agent-computer-test",
      stateDir,
    });
    const result = (await computerTool.execute("computer-observe", {
      action: "observe",
    })) as ToolResultLike;
    expect(calls).toHaveLength(1);
    expect(calls[0]).toMatchObject({
      command: "agent-computer-test",
      args: ["--state-dir", stateDir, "--input", '{"action":"observe"}'],
      cwd,
      timeout: 90,
    });
    const details = result.details as ComputerToolDetails | undefined;
    expect(details?.stateDir).toBe(stateDir);
    expect(details?.snapshotId).toBe("snap-1");
    expect(details?.screenshotPath).toBe("/tmp/snap-1.png");
    expect(getTextOutput(result)).toContain('"snapshotId": "snap-1"');
  });
  it("validates click targets before spawning the helper", async () => {
    const cwd = createTempDir("coding-agent-computer-click-");
    const stateDir = join(cwd, "computer-state");
    const { calls, operations } = createMockComputerOperations();
    const computerTool = createComputerTool(cwd, {
      operations,
      stateDir,
    });
    await expect(
      computerTool.execute("computer-click-missing-target", {
        action: "click",
      }),
    ).rejects.toThrow(
      "computer click requires snapshotId and ref, or explicit x and y coordinates",
    );
    expect(calls).toHaveLength(0);
  });
  it("accepts computer in --tools and exposes it in built-in tool wiring", () => {
    const parsed = parseArgs(["--tools", "computer,read"]);
    expect(parsed.tools).toEqual(["computer", "read"]);
    expect(defaultCodingToolNames).toContain("computer");
    expect(createAllTools(process.cwd()).computer.name).toBe("computer");
  });
  it("mentions computer in the default system prompt", () => {
    const prompt = buildSystemPrompt();
    expect(prompt).toContain(
      "- computer: Use the desktop computer: observe the screen",
    );
    expect(prompt).toContain(
      "Computer: observe before interacting. Use it for native UI",
    );
    expect(prompt).toContain(
      "Prefer browser for websites and DOM-aware tasks. Switch to computer",
    );
  });
 });
--- a/packages/companion-teams/src/adapters/tmux-adapter.test.ts
+++ b/packages/companion-teams/src/adapters/tmux-adapter.test.ts
@ -4,11 +4,10 @@ import { TmuxAdapter } from "./tmux-adapter";
 describe("TmuxAdapter", () => {
  let adapter: TmuxAdapter;
  let mockExecCommand: ReturnType<typeof vi.spyOn>;
  beforeEach(() => {
    adapter = new TmuxAdapter();
-    mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
+    vi.spyOn(terminalAdapter, "execCommand");
    delete process.env.TMUX;
    delete process.env.ZELLIJ;
    delete process.env.WEZTERM_PANE;
@ -21,6 +20,7 @@ describe("TmuxAdapter", () => {
  });
  it("detects tmux in headless runtimes when the binary is available", () => {
    const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
    mockExecCommand.mockReturnValue({
      stdout: "tmux 3.4",
      stderr: "",
@ -33,6 +33,7 @@ describe("TmuxAdapter", () => {
  it("does not detect tmux in GUI terminals just because the binary exists", () => {
    process.env.COLORTERM = "truecolor";
    const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
    mockExecCommand.mockReturnValue({
      stdout: "tmux 3.4",
      stderr: "",
@ -44,7 +45,8 @@ describe("TmuxAdapter", () => {
  });
  it("creates a detached team session when not already inside tmux", () => {
-    mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
+    const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
    mockExecCommand.mockImplementation((_bin, args) => {
      if (args[0] === "has-session") {
        return { stdout: "", stderr: "missing", status: 1 };
      }
@ -65,12 +67,18 @@ describe("TmuxAdapter", () => {
    expect(mockExecCommand).toHaveBeenCalledWith(
      "tmux",
-      expect.arrayContaining(["new-session", "-d", "-s", "companion-teams-demo"]),
+      expect.arrayContaining([
        "new-session",
        "-d",
        "-s",
        "companion-teams-demo",
      ]),
    );
  });
  it("splits an existing detached session when not already inside tmux", () => {
-    mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
+    const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
    mockExecCommand.mockImplementation((_bin, args) => {
      if (args[0] === "has-session") {
        return { stdout: "", stderr: "", status: 0 };
      }
@ -96,6 +104,7 @@ describe("TmuxAdapter", () => {
  });
  it("checks pane liveness by pane id", () => {
    const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
    mockExecCommand.mockReturnValue({
      stdout: "%1\n%7\n",
      stderr: "",
--- a/packages/companion-teams/src/adapters/wezterm-adapter.test.ts
+++ b/packages/companion-teams/src/adapters/wezterm-adapter.test.ts
@ -8,11 +8,10 @@ import { WezTermAdapter } from "./wezterm-adapter";
 describe("WezTermAdapter", () => {
  let adapter: WezTermAdapter;
  let mockExecCommand: ReturnType<typeof vi.spyOn>;
  beforeEach(() => {
    adapter = new WezTermAdapter();
-    mockExecCommand = vi.spyOn(terminalAdapter, "execCommand");
+    vi.spyOn(terminalAdapter, "execCommand");
    delete process.env.WEZTERM_PANE;
    delete process.env.TMUX;
    delete process.env.ZELLIJ;
@ -31,6 +30,7 @@ describe("WezTermAdapter", () => {
  describe("detect", () => {
    it("should detect when WEZTERM_PANE is set", () => {
      const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
      mockExecCommand.mockReturnValue({
        stdout: "version 1.0",
        stderr: "",
@ -43,7 +43,8 @@ describe("WezTermAdapter", () => {
  describe("spawn", () => {
    it("should spawn first pane to the right with 50%", () => {
      // Mock getPanes finding only current pane
-      mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
+      const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
      mockExecCommand.mockImplementation((_bin, args) => {
        if (args.includes("list")) {
          return {
            stdout: JSON.stringify([{ pane_id: 0, tab_id: 0 }]),
@ -79,7 +80,8 @@ describe("WezTermAdapter", () => {
    it("should spawn subsequent panes by splitting the sidebar", () => {
      // Mock getPanes finding current pane (0) and sidebar pane (1)
-      mockExecCommand.mockImplementation((_bin: string, args: string[]) => {
+      const mockExecCommand = vi.mocked(terminalAdapter.execCommand);
      mockExecCommand.mockImplementation((_bin, args) => {
        if (args.includes("list")) {
          return {
            stdout: JSON.stringify([