diff --git a/packages/coding-agent/src/core/tools/browser.ts b/packages/coding-agent/src/core/tools/browser.ts index 8104a73..041493f 100644 --- a/packages/coding-agent/src/core/tools/browser.ts +++ b/packages/coding-agent/src/core/tools/browser.ts @@ -6,68 +6,81 @@ import { join, resolve } from "node:path"; import type { AgentTool } from "@mariozechner/pi-agent-core"; import { type Static, Type } from "@sinclair/typebox"; import { getAgentDir } from "../../config.js"; -import { getShellEnv, killProcessTree, sanitizeBinaryOutput } from "../../utils/shell.js"; +import { + getShellEnv, + killProcessTree, + sanitizeBinaryOutput, +} from "../../utils/shell.js"; const browserActions = [ - "open", - "snapshot", - "click", - "fill", - "wait", - "screenshot", - "state_save", - "state_load", - "close", + "open", + "snapshot", + "click", + "fill", + "wait", + "screenshot", + "state_save", + "state_load", + "close", ] as const; const browserSnapshotModes = ["interactive", "full"] as const; const browserLoadStates = ["load", "domcontentloaded", "networkidle"] as const; -const DEFAULT_BROWSER_COMMAND = process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser"; +const DEFAULT_BROWSER_COMMAND = + process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser"; const DEFAULT_BROWSER_TIMEOUT_SECONDS = 90; const browserSchema = Type.Object({ - action: Type.Union( - browserActions.map((action) => Type.Literal(action)), - { description: "Browser action to execute" }, - ), - url: Type.Optional(Type.String({ description: "URL to open, or URL glob to wait for" })), - mode: Type.Optional( - Type.Union( - browserSnapshotModes.map((mode) => Type.Literal(mode)), - { description: "Snapshot mode. Defaults to interactive." }, - ), - ), - ref: Type.Optional( - Type.String({ - description: "Element ref from snapshot output, such as @e2", - }), - ), - value: Type.Optional(Type.String({ description: "Text value to fill into a field" })), - text: Type.Optional(Type.String({ description: "Visible text to wait for" })), - ms: Type.Optional( - Type.Number({ - description: "Milliseconds to wait", - minimum: 0, - }), - ), - loadState: Type.Optional( - Type.Union( - browserLoadStates.map((state) => Type.Literal(state)), - { description: "Page load state to wait for" }, - ), - ), - path: Type.Optional( - Type.String({ - description: "Output path for screenshots, relative to the current working directory if not absolute", - }), - ), - fullPage: Type.Optional(Type.Boolean({ description: "Capture a full-page screenshot" })), - stateName: Type.Optional( - Type.String({ - description: "Named browser state checkpoint stored under ~/.pi/agent/browser/states/", - }), - ), + action: Type.Union( + browserActions.map((action) => Type.Literal(action)), + { description: "Browser action to execute" }, + ), + url: Type.Optional( + Type.String({ description: "URL to open, or URL glob to wait for" }), + ), + mode: Type.Optional( + Type.Union( + browserSnapshotModes.map((mode) => Type.Literal(mode)), + { description: "Snapshot mode. Defaults to interactive." }, + ), + ), + ref: Type.Optional( + Type.String({ + description: "Element ref from snapshot output, such as @e2", + }), + ), + value: Type.Optional( + Type.String({ description: "Text value to fill into a field" }), + ), + text: Type.Optional(Type.String({ description: "Visible text to wait for" })), + ms: Type.Optional( + Type.Number({ + description: "Milliseconds to wait", + minimum: 0, + }), + ), + loadState: Type.Optional( + Type.Union( + browserLoadStates.map((state) => Type.Literal(state)), + { description: "Page load state to wait for" }, + ), + ), + path: Type.Optional( + Type.String({ + description: + "Output path for screenshots, relative to the current working directory if not absolute", + }), + ), + fullPage: Type.Optional( + Type.Boolean({ description: "Capture a full-page screenshot" }), + ), + stateName: Type.Optional( + Type.String({ + description: + "Named browser state checkpoint stored under ~/.pi/agent/browser/states/", + }), + ), }); export type BrowserToolAction = (typeof browserActions)[number]; @@ -76,433 +89,483 @@ export type BrowserLoadState = (typeof browserLoadStates)[number]; export type BrowserToolInput = Static; export interface BrowserToolDetails { - action: BrowserToolAction; - command: string; - args: string[]; - profilePath: string; - screenshotPath?: string; - statePath?: string; + action: BrowserToolAction; + command: string; + args: string[]; + profilePath: string; + screenshotPath?: string; + statePath?: string; } export interface BrowserOperations { - exec: ( - command: string, - args: string[], - options: { - cwd: string; - env: NodeJS.ProcessEnv; - onData: (data: Buffer) => void; - signal?: AbortSignal; - timeout?: number; - }, - ) => Promise<{ exitCode: number | null }>; + exec: ( + command: string, + args: string[], + options: { + cwd: string; + env: NodeJS.ProcessEnv; + onData: (data: Buffer) => void; + signal?: AbortSignal; + timeout?: number; + }, + ) => Promise<{ exitCode: number | null }>; } const defaultBrowserOperations: BrowserOperations = { - exec: (command, args, { cwd, env, onData, signal, timeout }) => { - return new Promise((resolvePromise, rejectPromise) => { - const child = spawn(command, args, { - cwd, - detached: true, - env, - stdio: ["ignore", "pipe", "pipe"], - }); + exec: (command, args, { cwd, env, onData, signal, timeout }) => { + return new Promise((resolvePromise, rejectPromise) => { + const child = spawn(command, args, { + cwd, + detached: true, + env, + stdio: ["ignore", "pipe", "pipe"], + }); - let timedOut = false; - let timeoutHandle: NodeJS.Timeout | undefined; + let timedOut = false; + let timeoutHandle: NodeJS.Timeout | undefined; - if (timeout !== undefined && timeout > 0) { - timeoutHandle = setTimeout(() => { - timedOut = true; - if (child.pid) { - killProcessTree(child.pid); - } - }, timeout * 1000); - } + if (timeout !== undefined && timeout > 0) { + timeoutHandle = setTimeout(() => { + timedOut = true; + if (child.pid) { + killProcessTree(child.pid); + } + }, timeout * 1000); + } - if (child.stdout) { - child.stdout.on("data", onData); - } - if (child.stderr) { - child.stderr.on("data", onData); - } + if (child.stdout) { + child.stdout.on("data", onData); + } + if (child.stderr) { + child.stderr.on("data", onData); + } - const onAbort = () => { - if (child.pid) { - killProcessTree(child.pid); - } - }; + const onAbort = () => { + if (child.pid) { + killProcessTree(child.pid); + } + }; - if (signal) { - if (signal.aborted) { - onAbort(); - } else { - signal.addEventListener("abort", onAbort, { once: true }); - } - } + if (signal) { + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener("abort", onAbort, { once: true }); + } + } - child.on("error", (error) => { - if (timeoutHandle) clearTimeout(timeoutHandle); - if (signal) signal.removeEventListener("abort", onAbort); - rejectPromise(error); - }); + child.on("error", (error) => { + if (timeoutHandle) clearTimeout(timeoutHandle); + if (signal) signal.removeEventListener("abort", onAbort); + rejectPromise(error); + }); - child.on("close", (code) => { - if (timeoutHandle) clearTimeout(timeoutHandle); - if (signal) signal.removeEventListener("abort", onAbort); + child.on("close", (code) => { + if (timeoutHandle) clearTimeout(timeoutHandle); + if (signal) signal.removeEventListener("abort", onAbort); - if (signal?.aborted) { - rejectPromise(new Error("aborted")); - return; - } + if (signal?.aborted) { + rejectPromise(new Error("aborted")); + return; + } - if (timedOut) { - rejectPromise(new Error(`timeout:${timeout}`)); - return; - } + if (timedOut) { + rejectPromise(new Error(`timeout:${timeout}`)); + return; + } - resolvePromise({ exitCode: code }); - }); - }); - }, + resolvePromise({ exitCode: code }); + }); + }); + }, }; export interface BrowserToolOptions { - operations?: BrowserOperations; - command?: string; - defaultTimeoutSeconds?: number; - profileDir?: string; - stateDir?: string; - agentDir?: string; + operations?: BrowserOperations; + command?: string; + defaultTimeoutSeconds?: number; + profileDir?: string; + stateDir?: string; + agentDir?: string; } interface BrowserCommandContext { - action: BrowserToolAction; - args: string[]; - statusMessage: string; - successMessage: string; - profilePath: string; - screenshotPath?: string; - statePath?: string; + action: BrowserToolAction; + args: string[]; + statusMessage: string; + successMessage: string; + profilePath: string; + screenshotPath?: string; + statePath?: string; } -type BrowserCommandContextWithoutProfile = Omit; +type BrowserCommandContextWithoutProfile = Omit< + BrowserCommandContext, + "profilePath" +>; function resolveCommandPath(cwd: string, inputPath: string): string { - return resolve(cwd, inputPath); + return resolve(cwd, inputPath); } function getBrowserRootDir(options?: BrowserToolOptions): string { - const baseAgentDir = options?.agentDir ?? getAgentDir(); - return join(baseAgentDir, "browser"); + const baseAgentDir = options?.agentDir ?? getAgentDir(); + return join(baseAgentDir, "browser"); } -function getBrowserProfilePath(cwd: string, options?: BrowserToolOptions): string { - const profilePath = options?.profileDir ?? join(getBrowserRootDir(options), "profile"); - return resolveCommandPath(cwd, profilePath); +function getBrowserProfilePath( + cwd: string, + options?: BrowserToolOptions, +): string { + const profilePath = + options?.profileDir ?? join(getBrowserRootDir(options), "profile"); + return resolveCommandPath(cwd, profilePath); } function getBrowserStateDir(cwd: string, options?: BrowserToolOptions): string { - const stateDir = options?.stateDir ?? join(getBrowserRootDir(options), "states"); - return resolveCommandPath(cwd, stateDir); + const stateDir = + options?.stateDir ?? join(getBrowserRootDir(options), "states"); + return resolveCommandPath(cwd, stateDir); } function createTempScreenshotPath(): string { - const id = randomBytes(8).toString("hex"); - return join(tmpdir(), `pi-browser-screenshot-${id}.png`); + const id = randomBytes(8).toString("hex"); + return join(tmpdir(), `pi-browser-screenshot-${id}.png`); } function normalizeOutput(chunks: Buffer[]): string { - return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim(); + return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim(); } function sanitizeStateName(stateName: string): string { - const trimmed = stateName.trim(); - if (trimmed.length === 0) { - throw new Error("stateName is required for browser state actions"); - } + const trimmed = stateName.trim(); + if (trimmed.length === 0) { + throw new Error("stateName is required for browser state actions"); + } - const withoutJsonSuffix = trimmed.endsWith(".json") ? trimmed.slice(0, -".json".length) : trimmed; - const sanitized = withoutJsonSuffix.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, ""); + const withoutJsonSuffix = trimmed.endsWith(".json") + ? trimmed.slice(0, -".json".length) + : trimmed; + const sanitized = withoutJsonSuffix + .replace(/[^a-zA-Z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, ""); - if (sanitized.length === 0) { - throw new Error(`Invalid browser state name: "${stateName}"`); - } + if (sanitized.length === 0) { + throw new Error(`Invalid browser state name: "${stateName}"`); + } - return sanitized; + return sanitized; } function ensureBrowserDirs(profilePath: string, stateDir: string): void { - mkdirSync(profilePath, { recursive: true }); - mkdirSync(stateDir, { recursive: true }); + mkdirSync(profilePath, { recursive: true }); + mkdirSync(stateDir, { recursive: true }); } function createBrowserCommandContext( - profilePath: string, - stateDir: string, - context: BrowserCommandContextWithoutProfile, + profilePath: string, + stateDir: string, + context: BrowserCommandContextWithoutProfile, ): BrowserCommandContext { - ensureBrowserDirs(profilePath, stateDir); - return { - ...context, - profilePath, - }; + ensureBrowserDirs(profilePath, stateDir); + return { + ...context, + profilePath, + }; } -function buildWaitArgs(input: BrowserToolInput): { args: string[]; status: string } { - const targets = [ - input.ref !== undefined ? "ref" : undefined, - input.url !== undefined ? "url" : undefined, - input.text !== undefined ? "text" : undefined, - input.ms !== undefined ? "ms" : undefined, - input.loadState !== undefined ? "loadState" : undefined, - ].filter((target): target is string => target !== undefined); +function buildWaitArgs(input: BrowserToolInput): { + args: string[]; + status: string; +} { + const targets = [ + input.ref !== undefined ? "ref" : undefined, + input.url !== undefined ? "url" : undefined, + input.text !== undefined ? "text" : undefined, + input.ms !== undefined ? "ms" : undefined, + input.loadState !== undefined ? "loadState" : undefined, + ].filter((target): target is string => target !== undefined); - if (targets.length !== 1) { - throw new Error("browser wait requires exactly one of ref, url, text, ms, or loadState"); - } + if (targets.length !== 1) { + throw new Error( + "browser wait requires exactly one of ref, url, text, ms, or loadState", + ); + } - if (input.ref !== undefined) { - return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` }; - } - if (input.url !== undefined) { - return { - args: ["wait", "--url", input.url], - status: `Waiting for URL ${input.url}...`, - }; - } - if (input.text !== undefined) { - return { - args: ["wait", "--text", input.text], - status: `Waiting for text "${input.text}"...`, - }; - } - if (input.ms !== undefined) { - return { - args: ["wait", String(input.ms)], - status: `Waiting ${input.ms}ms...`, - }; - } + if (input.ref !== undefined) { + return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` }; + } + if (input.url !== undefined) { + return { + args: ["wait", "--url", input.url], + status: `Waiting for URL ${input.url}...`, + }; + } + if (input.text !== undefined) { + return { + args: ["wait", "--text", input.text], + status: `Waiting for text "${input.text}"...`, + }; + } + if (input.ms !== undefined) { + return { + args: ["wait", String(input.ms)], + status: `Waiting ${input.ms}ms...`, + }; + } - return { - args: ["wait", "--load", input.loadState!], - status: `Waiting for load state ${input.loadState}...`, - }; + return { + args: ["wait", "--load", input.loadState!], + status: `Waiting for load state ${input.loadState}...`, + }; } function buildBrowserCommand( - cwd: string, - input: BrowserToolInput, - options?: BrowserToolOptions, + cwd: string, + input: BrowserToolInput, + options?: BrowserToolOptions, ): BrowserCommandContext { - const profilePath = getBrowserProfilePath(cwd, options); - const stateDir = getBrowserStateDir(cwd, options); - const baseArgs = ["--profile", profilePath]; + const profilePath = getBrowserProfilePath(cwd, options); + const stateDir = getBrowserStateDir(cwd, options); + const baseArgs = ["--profile", profilePath]; - switch (input.action) { - case "open": { - if (!input.url) { - throw new Error("browser open requires url"); - } - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "open", input.url], - statusMessage: `Opening ${input.url}...`, - successMessage: `Opened ${input.url}`, - }); - } - case "snapshot": { - const mode = input.mode ?? "interactive"; - const args = mode === "interactive" ? [...baseArgs, "snapshot", "-i"] : [...baseArgs, "snapshot"]; - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args, - statusMessage: "Capturing browser snapshot...", - successMessage: "Captured browser snapshot", - }); - } - case "click": { - if (!input.ref) { - throw new Error("browser click requires ref"); - } - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "click", input.ref], - statusMessage: `Clicking ${input.ref}...`, - successMessage: `Clicked ${input.ref}`, - }); - } - case "fill": { - if (!input.ref || input.value === undefined) { - throw new Error("browser fill requires ref and value"); - } - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "fill", input.ref, input.value], - statusMessage: `Filling ${input.ref}...`, - successMessage: `Filled ${input.ref}`, - }); - } - case "wait": { - const wait = buildWaitArgs(input); - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, ...wait.args], - statusMessage: wait.status, - successMessage: "Browser wait condition satisfied", - }); - } - case "screenshot": { - const screenshotPath = input.path ? resolveCommandPath(cwd, input.path) : createTempScreenshotPath(); - const args = [...baseArgs, "screenshot"]; - if (input.fullPage) { - args.push("--full"); - } - args.push(screenshotPath); + switch (input.action) { + case "open": { + if (!input.url) { + throw new Error("browser open requires url"); + } + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "open", input.url], + statusMessage: `Opening ${input.url}...`, + successMessage: `Opened ${input.url}`, + }); + } + case "snapshot": { + const mode = input.mode ?? "interactive"; + const args = + mode === "interactive" + ? [...baseArgs, "snapshot", "-i"] + : [...baseArgs, "snapshot"]; + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args, + statusMessage: "Capturing browser snapshot...", + successMessage: "Captured browser snapshot", + }); + } + case "click": { + if (!input.ref) { + throw new Error("browser click requires ref"); + } + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "click", input.ref], + statusMessage: `Clicking ${input.ref}...`, + successMessage: `Clicked ${input.ref}`, + }); + } + case "fill": { + if (!input.ref || input.value === undefined) { + throw new Error("browser fill requires ref and value"); + } + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "fill", input.ref, input.value], + statusMessage: `Filling ${input.ref}...`, + successMessage: `Filled ${input.ref}`, + }); + } + case "wait": { + const wait = buildWaitArgs(input); + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, ...wait.args], + statusMessage: wait.status, + successMessage: "Browser wait condition satisfied", + }); + } + case "screenshot": { + const screenshotPath = input.path + ? resolveCommandPath(cwd, input.path) + : createTempScreenshotPath(); + const args = [...baseArgs, "screenshot"]; + if (input.fullPage) { + args.push("--full"); + } + args.push(screenshotPath); - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args, - statusMessage: "Taking browser screenshot...", - successMessage: `Saved browser screenshot to ${screenshotPath}`, - screenshotPath, - }); - } - case "state_save": { - if (!input.stateName) { - throw new Error("browser state_save requires stateName"); - } - const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`); - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "state", "save", statePath], - statusMessage: `Saving browser state "${input.stateName}"...`, - successMessage: `Saved browser state "${input.stateName}" to ${statePath}`, - statePath, - }); - } - case "state_load": { - if (!input.stateName) { - throw new Error("browser state_load requires stateName"); - } - const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`); - if (!existsSync(statePath)) { - throw new Error(`Saved browser state "${input.stateName}" not found at ${statePath}`); - } - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "state", "load", statePath], - statusMessage: `Loading browser state "${input.stateName}"...`, - successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`, - statePath, - }); - } - case "close": - return createBrowserCommandContext(profilePath, stateDir, { - action: input.action, - args: [...baseArgs, "close"], - statusMessage: "Closing browser...", - successMessage: "Closed browser", - }); - default: { - const unsupportedAction: never = input.action; - throw new Error(`Unsupported browser action: ${unsupportedAction}`); - } - } + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args, + statusMessage: "Taking browser screenshot...", + successMessage: `Saved browser screenshot to ${screenshotPath}`, + screenshotPath, + }); + } + case "state_save": { + if (!input.stateName) { + throw new Error("browser state_save requires stateName"); + } + const statePath = join( + stateDir, + `${sanitizeStateName(input.stateName)}.json`, + ); + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "state", "save", statePath], + statusMessage: `Saving browser state "${input.stateName}"...`, + successMessage: `Saved browser state "${input.stateName}" to ${statePath}`, + statePath, + }); + } + case "state_load": { + if (!input.stateName) { + throw new Error("browser state_load requires stateName"); + } + const statePath = join( + stateDir, + `${sanitizeStateName(input.stateName)}.json`, + ); + if (!existsSync(statePath)) { + throw new Error( + `Saved browser state "${input.stateName}" not found at ${statePath}`, + ); + } + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "state", "load", statePath], + statusMessage: `Loading browser state "${input.stateName}"...`, + successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`, + statePath, + }); + } + case "close": + return createBrowserCommandContext(profilePath, stateDir, { + action: input.action, + args: [...baseArgs, "close"], + statusMessage: "Closing browser...", + successMessage: "Closed browser", + }); + default: { + const unsupportedAction: never = input.action; + throw new Error(`Unsupported browser action: ${unsupportedAction}`); + } + } } -function buildBrowserErrorMessage(action: BrowserToolAction, output: string, exitCode: number | null): string { - const base = - exitCode === null - ? `Browser action "${action}" failed` - : `Browser action "${action}" exited with code ${exitCode}`; - return output.length > 0 ? `${output}\n\n${base}` : base; +function buildBrowserErrorMessage( + action: BrowserToolAction, + output: string, + exitCode: number | null, +): string { + const base = + exitCode === null + ? `Browser action "${action}" failed` + : `Browser action "${action}" exited with code ${exitCode}`; + return output.length > 0 ? `${output}\n\n${base}` : base; } function getMissingBrowserCommandMessage(command: string): string { - return [ - `Browser tool could not find "${command}".`, - "Install agent-browser so the first-class browser tool can run.", - "Recommended setup:", - " npm install -g agent-browser", - " agent-browser install", - "If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.", - ].join("\n"); + return [ + `Browser tool could not find "${command}".`, + "Install agent-browser so the first-class browser tool can run.", + "Recommended setup:", + " npm install -g agent-browser", + " agent-browser install", + "If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.", + ].join("\n"); } -export function createBrowserTool(cwd: string, options?: BrowserToolOptions): AgentTool { - const operations = options?.operations ?? defaultBrowserOperations; - const command = options?.command ?? DEFAULT_BROWSER_COMMAND; - const defaultTimeoutSeconds = options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS; +export function createBrowserTool( + cwd: string, + options?: BrowserToolOptions, +): AgentTool { + const operations = options?.operations ?? defaultBrowserOperations; + const command = options?.command ?? DEFAULT_BROWSER_COMMAND; + const defaultTimeoutSeconds = + options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS; - return { - name: "browser", - label: "browser", - description: - "Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.", - parameters: browserSchema, - execute: async (_toolCallId, input, signal, onUpdate) => { - const commandContext = buildBrowserCommand(cwd, input, options); - const details: BrowserToolDetails = { - action: commandContext.action, - command, - args: commandContext.args, - profilePath: commandContext.profilePath, - screenshotPath: commandContext.screenshotPath, - statePath: commandContext.statePath, - }; + return { + name: "browser", + label: "browser", + description: + "Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.", + parameters: browserSchema, + execute: async (_toolCallId, input, signal, onUpdate) => { + const commandContext = buildBrowserCommand(cwd, input, options); + const details: BrowserToolDetails = { + action: commandContext.action, + command, + args: commandContext.args, + profilePath: commandContext.profilePath, + screenshotPath: commandContext.screenshotPath, + statePath: commandContext.statePath, + }; - onUpdate?.({ - content: [{ type: "text", text: commandContext.statusMessage }], - details, - }); + onUpdate?.({ + content: [{ type: "text", text: commandContext.statusMessage }], + details, + }); - const chunks: Buffer[] = []; + const chunks: Buffer[] = []; - try { - const { exitCode } = await operations.exec(command, commandContext.args, { - cwd, - env: getShellEnv(), - onData: (data) => chunks.push(data), - signal, - timeout: defaultTimeoutSeconds, - }); + try { + const { exitCode } = await operations.exec( + command, + commandContext.args, + { + cwd, + env: getShellEnv(), + onData: (data) => chunks.push(data), + signal, + timeout: defaultTimeoutSeconds, + }, + ); - const output = normalizeOutput(chunks); - if (exitCode !== 0) { - throw new Error(buildBrowserErrorMessage(commandContext.action, output, exitCode)); - } + const output = normalizeOutput(chunks); + if (exitCode !== 0) { + throw new Error( + buildBrowserErrorMessage(commandContext.action, output, exitCode), + ); + } - if (commandContext.action === "snapshot") { - if (output.length === 0) { - throw new Error("Browser snapshot returned no output"); - } - return { - content: [{ type: "text", text: output }], - details, - }; - } + if (commandContext.action === "snapshot") { + if (output.length === 0) { + throw new Error("Browser snapshot returned no output"); + } + return { + content: [{ type: "text", text: output }], + details, + }; + } - const text = output.length > 0 ? output : commandContext.successMessage; - return { - content: [{ type: "text", text }], - details, - }; - } catch (error) { - if (error instanceof Error && "code" in error && error.code === "ENOENT") { - throw new Error(getMissingBrowserCommandMessage(command)); - } - if (error instanceof Error && error.message === "aborted") { - throw new Error(`Browser action "${commandContext.action}" aborted`); - } - if (error instanceof Error && error.message.startsWith("timeout:")) { - const seconds = error.message.split(":")[1]; - throw new Error(`Browser action "${commandContext.action}" timed out after ${seconds} seconds`); - } - throw error; - } - }, - }; + const text = output.length > 0 ? output : commandContext.successMessage; + return { + content: [{ type: "text", text }], + details, + }; + } catch (error) { + if ( + error instanceof Error && + "code" in error && + error.code === "ENOENT" + ) { + throw new Error(getMissingBrowserCommandMessage(command)); + } + if (error instanceof Error && error.message === "aborted") { + throw new Error(`Browser action "${commandContext.action}" aborted`); + } + if (error instanceof Error && error.message.startsWith("timeout:")) { + const seconds = error.message.split(":")[1]; + throw new Error( + `Browser action "${commandContext.action}" timed out after ${seconds} seconds`, + ); + } + throw error; + } + }, + }; } export const browserTool = createBrowserTool(process.cwd()); diff --git a/packages/coding-agent/test/browser-tool.test.ts b/packages/coding-agent/test/browser-tool.test.ts index fbf9ee3..569b03f 100644 --- a/packages/coding-agent/test/browser-tool.test.ts +++ b/packages/coding-agent/test/browser-tool.test.ts @@ -5,270 +5,288 @@ import { afterEach, describe, expect, it } from "vitest"; import { parseArgs } from "../src/cli/args.js"; import { buildSystemPrompt } from "../src/core/system-prompt.js"; import { - type BrowserOperations, - type BrowserToolDetails, - createAllTools, - createBrowserTool, - defaultCodingToolNames, + type BrowserOperations, + type BrowserToolDetails, + createAllTools, + createBrowserTool, + defaultCodingToolNames, } from "../src/core/tools/index.js"; interface TextBlock { - type: "text"; - text: string; + type: "text"; + text: string; } type ToolContentBlock = TextBlock | { type: string }; interface ToolResultLike { - content: ToolContentBlock[]; - details?: unknown; + content: ToolContentBlock[]; + details?: unknown; } interface BrowserExecCall { - command: string; - args: string[]; - cwd: string; - env: NodeJS.ProcessEnv; - timeout?: number; + command: string; + args: string[]; + cwd: string; + env: NodeJS.ProcessEnv; + timeout?: number; } function getTextOutput(result: ToolResultLike): string { - return result.content - .filter((block): block is TextBlock => block.type === "text") - .map((block) => block.text) - .join("\n"); + return result.content + .filter((block): block is TextBlock => block.type === "text") + .map((block) => block.text) + .join("\n"); } function createMockBrowserOperations( - output = "", - exitCode: number | null = 0, + output = "", + exitCode: number | null = 0, ): { - calls: BrowserExecCall[]; - operations: BrowserOperations; + calls: BrowserExecCall[]; + operations: BrowserOperations; } { - const calls: BrowserExecCall[] = []; + const calls: BrowserExecCall[] = []; - return { - calls, - operations: { - exec: async (command, args, options) => { - calls.push({ - command, - args, - cwd: options.cwd, - env: options.env, - timeout: options.timeout, - }); - if (output.length > 0) { - options.onData(Buffer.from(output, "utf-8")); - } - return { exitCode }; - }, - }, - }; + return { + calls, + operations: { + exec: async (command, args, options) => { + calls.push({ + command, + args, + cwd: options.cwd, + env: options.env, + timeout: options.timeout, + }); + if (output.length > 0) { + options.onData(Buffer.from(output, "utf-8")); + } + return { exitCode }; + }, + }, + }; } describe("browser tool", () => { - const tempDirs: string[] = []; + const tempDirs: string[] = []; - afterEach(() => { - while (tempDirs.length > 0) { - const tempDir = tempDirs.pop(); - if (tempDir) { - rmSync(tempDir, { recursive: true, force: true }); - } - } - }); + afterEach(() => { + while (tempDirs.length > 0) { + const tempDir = tempDirs.pop(); + if (tempDir) { + rmSync(tempDir, { recursive: true, force: true }); + } + } + }); - function createTempDir(prefix: string): string { - const tempDir = mkdtempSync(join(tmpdir(), prefix)); - tempDirs.push(tempDir); - return tempDir; - } + function createTempDir(prefix: string): string { + const tempDir = mkdtempSync(join(tmpdir(), prefix)); + tempDirs.push(tempDir); + return tempDir; + } - it("opens pages through agent-browser with a shared profile", async () => { - const cwd = createTempDir("coding-agent-browser-open-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { calls, operations } = createMockBrowserOperations(); + it("opens pages through agent-browser with a shared profile", async () => { + const cwd = createTempDir("coding-agent-browser-open-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { calls, operations } = createMockBrowserOperations(); - const browserTool = createBrowserTool(cwd, { - operations, - command: "agent-browser-test", - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + command: "agent-browser-test", + profileDir, + stateDir, + }); - const result = (await browserTool.execute("browser-open", { - action: "open", - url: "https://example.com", - })) as ToolResultLike; + const result = (await browserTool.execute("browser-open", { + action: "open", + url: "https://example.com", + })) as ToolResultLike; - expect(calls).toHaveLength(1); - expect(calls[0]).toMatchObject({ - command: "agent-browser-test", - args: ["--profile", profileDir, "open", "https://example.com"], - cwd, - timeout: 90, - }); - expect(getTextOutput(result)).toBe("Opened https://example.com"); + expect(calls).toHaveLength(1); + expect(calls[0]).toMatchObject({ + command: "agent-browser-test", + args: ["--profile", profileDir, "open", "https://example.com"], + cwd, + timeout: 90, + }); + expect(getTextOutput(result)).toBe("Opened https://example.com"); - const details = result.details as BrowserToolDetails | undefined; - expect(details?.profilePath).toBe(profileDir); - }); + const details = result.details as BrowserToolDetails | undefined; + expect(details?.profilePath).toBe(profileDir); + }); - it("uses interactive snapshots by default and returns snapshot text", async () => { - const cwd = createTempDir("coding-agent-browser-snapshot-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { calls, operations } = createMockBrowserOperations("main [ref=@e1]\nbutton [ref=@e2] Sign in"); + it("uses interactive snapshots by default and returns snapshot text", async () => { + const cwd = createTempDir("coding-agent-browser-snapshot-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { calls, operations } = createMockBrowserOperations( + "main [ref=@e1]\nbutton [ref=@e2] Sign in", + ); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - const result = (await browserTool.execute("browser-snapshot", { - action: "snapshot", - })) as ToolResultLike; + const result = (await browserTool.execute("browser-snapshot", { + action: "snapshot", + })) as ToolResultLike; - expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]); - expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in"); - }); + expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]); + expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in"); + }); - it("validates wait targets before spawning agent-browser", async () => { - const cwd = createTempDir("coding-agent-browser-wait-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { calls, operations } = createMockBrowserOperations(); + it("validates wait targets before spawning agent-browser", async () => { + const cwd = createTempDir("coding-agent-browser-wait-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { calls, operations } = createMockBrowserOperations(); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - await expect( - browserTool.execute("browser-wait-missing", { - action: "wait", - }), - ).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState"); + await expect( + browserTool.execute("browser-wait-missing", { + action: "wait", + }), + ).rejects.toThrow( + "browser wait requires exactly one of ref, url, text, ms, or loadState", + ); - await expect( - browserTool.execute("browser-wait-ambiguous", { - action: "wait", - ref: "@e2", - text: "Done", - }), - ).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState"); + await expect( + browserTool.execute("browser-wait-ambiguous", { + action: "wait", + ref: "@e2", + text: "Done", + }), + ).rejects.toThrow( + "browser wait requires exactly one of ref, url, text, ms, or loadState", + ); - expect(calls).toHaveLength(0); - }); + expect(calls).toHaveLength(0); + }); - it("preserves empty string wait targets instead of falling through to loadState", async () => { - const cwd = createTempDir("coding-agent-browser-wait-empty-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { calls, operations } = createMockBrowserOperations(); + it("preserves empty string wait targets instead of falling through to loadState", async () => { + const cwd = createTempDir("coding-agent-browser-wait-empty-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { calls, operations } = createMockBrowserOperations(); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - await browserTool.execute("browser-wait-empty-text", { - action: "wait", - text: "", - }); + await browserTool.execute("browser-wait-empty-text", { + action: "wait", + text: "", + }); - expect(calls[0]?.args).toEqual(["--profile", profileDir, "wait", "--text", ""]); - }); + expect(calls[0]?.args).toEqual([ + "--profile", + profileDir, + "wait", + "--text", + "", + ]); + }); - it("does not create browser directories when validation fails before command construction", async () => { - const cwd = createTempDir("coding-agent-browser-invalid-open-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { operations } = createMockBrowserOperations(); + it("does not create browser directories when validation fails before command construction", async () => { + const cwd = createTempDir("coding-agent-browser-invalid-open-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { operations } = createMockBrowserOperations(); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - await expect( - browserTool.execute("browser-open-missing-url", { - action: "open", - }), - ).rejects.toThrow("browser open requires url"); + await expect( + browserTool.execute("browser-open-missing-url", { + action: "open", + }), + ).rejects.toThrow("browser open requires url"); - expect(existsSync(profileDir)).toBe(false); - expect(existsSync(stateDir)).toBe(false); - }); + expect(existsSync(profileDir)).toBe(false); + expect(existsSync(stateDir)).toBe(false); + }); - it("stores named state under the managed browser state directory", async () => { - const cwd = createTempDir("coding-agent-browser-state-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { calls, operations } = createMockBrowserOperations(); + it("stores named state under the managed browser state directory", async () => { + const cwd = createTempDir("coding-agent-browser-state-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { calls, operations } = createMockBrowserOperations(); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - const result = (await browserTool.execute("browser-state-save", { - action: "state_save", - stateName: "my session/prod", - })) as ToolResultLike; + const result = (await browserTool.execute("browser-state-save", { + action: "state_save", + stateName: "my session/prod", + })) as ToolResultLike; - const expectedStatePath = join(stateDir, "my-session-prod.json"); - expect(calls[0]?.args).toEqual(["--profile", profileDir, "state", "save", expectedStatePath]); + const expectedStatePath = join(stateDir, "my-session-prod.json"); + expect(calls[0]?.args).toEqual([ + "--profile", + profileDir, + "state", + "save", + expectedStatePath, + ]); - const details = result.details as BrowserToolDetails | undefined; - expect(details?.statePath).toBe(expectedStatePath); - expect(getTextOutput(result)).toContain(expectedStatePath); - }); + const details = result.details as BrowserToolDetails | undefined; + expect(details?.statePath).toBe(expectedStatePath); + expect(getTextOutput(result)).toContain(expectedStatePath); + }); - it("treats null exit codes as browser failures", async () => { - const cwd = createTempDir("coding-agent-browser-null-exit-"); - const profileDir = join(cwd, "profile"); - const stateDir = join(cwd, "states"); - const { operations } = createMockBrowserOperations("browser crashed", null); + it("treats null exit codes as browser failures", async () => { + const cwd = createTempDir("coding-agent-browser-null-exit-"); + const profileDir = join(cwd, "profile"); + const stateDir = join(cwd, "states"); + const { operations } = createMockBrowserOperations("browser crashed", null); - const browserTool = createBrowserTool(cwd, { - operations, - profileDir, - stateDir, - }); + const browserTool = createBrowserTool(cwd, { + operations, + profileDir, + stateDir, + }); - await expect( - browserTool.execute("browser-open-null-exit", { - action: "open", - url: "https://example.com", - }), - ).rejects.toThrow('browser crashed\n\nBrowser action "open" failed'); - }); + await expect( + browserTool.execute("browser-open-null-exit", { + action: "open", + url: "https://example.com", + }), + ).rejects.toThrow('browser crashed\n\nBrowser action "open" failed'); + }); - it("accepts browser in --tools and exposes it in default tool wiring", () => { - const parsed = parseArgs(["--tools", "browser,read"]); - expect(parsed.tools).toEqual(["browser", "read"]); + it("accepts browser in --tools and exposes it in default tool wiring", () => { + const parsed = parseArgs(["--tools", "browser,read"]); + expect(parsed.tools).toEqual(["browser", "read"]); - expect(defaultCodingToolNames).toContain("browser"); - expect(createAllTools(process.cwd()).browser.name).toBe("browser"); - }); + expect(defaultCodingToolNames).toContain("browser"); + expect(createAllTools(process.cwd()).browser.name).toBe("browser"); + }); - it("mentions browser in the default system prompt", () => { - const prompt = buildSystemPrompt(); + it("mentions browser in the default system prompt", () => { + const prompt = buildSystemPrompt(); - expect(prompt).toContain( - "- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state", - ); - expect(prompt).toContain( - "Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed", - ); - }); + expect(prompt).toContain( + "- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state", + ); + expect(prompt).toContain( + "Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed", + ); + }); });