Merge pull request #260 from getcompanion-ai/codex/deterministic-hosting

deterministic hosting
This commit is contained in:
Hari 2026-03-08 17:59:58 -04:00 committed by GitHub
commit 63e33460f6
2 changed files with 689 additions and 608 deletions

View file

@ -6,68 +6,81 @@ import { join, resolve } from "node:path";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { type Static, Type } from "@sinclair/typebox";
import { getAgentDir } from "../../config.js";
import { getShellEnv, killProcessTree, sanitizeBinaryOutput } from "../../utils/shell.js";
import {
getShellEnv,
killProcessTree,
sanitizeBinaryOutput,
} from "../../utils/shell.js";
const browserActions = [
"open",
"snapshot",
"click",
"fill",
"wait",
"screenshot",
"state_save",
"state_load",
"close",
"open",
"snapshot",
"click",
"fill",
"wait",
"screenshot",
"state_save",
"state_load",
"close",
] as const;
const browserSnapshotModes = ["interactive", "full"] as const;
const browserLoadStates = ["load", "domcontentloaded", "networkidle"] as const;
const DEFAULT_BROWSER_COMMAND = process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser";
const DEFAULT_BROWSER_COMMAND =
process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser";
const DEFAULT_BROWSER_TIMEOUT_SECONDS = 90;
const browserSchema = Type.Object({
action: Type.Union(
browserActions.map((action) => Type.Literal(action)),
{ description: "Browser action to execute" },
),
url: Type.Optional(Type.String({ description: "URL to open, or URL glob to wait for" })),
mode: Type.Optional(
Type.Union(
browserSnapshotModes.map((mode) => Type.Literal(mode)),
{ description: "Snapshot mode. Defaults to interactive." },
),
),
ref: Type.Optional(
Type.String({
description: "Element ref from snapshot output, such as @e2",
}),
),
value: Type.Optional(Type.String({ description: "Text value to fill into a field" })),
text: Type.Optional(Type.String({ description: "Visible text to wait for" })),
ms: Type.Optional(
Type.Number({
description: "Milliseconds to wait",
minimum: 0,
}),
),
loadState: Type.Optional(
Type.Union(
browserLoadStates.map((state) => Type.Literal(state)),
{ description: "Page load state to wait for" },
),
),
path: Type.Optional(
Type.String({
description: "Output path for screenshots, relative to the current working directory if not absolute",
}),
),
fullPage: Type.Optional(Type.Boolean({ description: "Capture a full-page screenshot" })),
stateName: Type.Optional(
Type.String({
description: "Named browser state checkpoint stored under ~/.pi/agent/browser/states/",
}),
),
action: Type.Union(
browserActions.map((action) => Type.Literal(action)),
{ description: "Browser action to execute" },
),
url: Type.Optional(
Type.String({ description: "URL to open, or URL glob to wait for" }),
),
mode: Type.Optional(
Type.Union(
browserSnapshotModes.map((mode) => Type.Literal(mode)),
{ description: "Snapshot mode. Defaults to interactive." },
),
),
ref: Type.Optional(
Type.String({
description: "Element ref from snapshot output, such as @e2",
}),
),
value: Type.Optional(
Type.String({ description: "Text value to fill into a field" }),
),
text: Type.Optional(Type.String({ description: "Visible text to wait for" })),
ms: Type.Optional(
Type.Number({
description: "Milliseconds to wait",
minimum: 0,
}),
),
loadState: Type.Optional(
Type.Union(
browserLoadStates.map((state) => Type.Literal(state)),
{ description: "Page load state to wait for" },
),
),
path: Type.Optional(
Type.String({
description:
"Output path for screenshots, relative to the current working directory if not absolute",
}),
),
fullPage: Type.Optional(
Type.Boolean({ description: "Capture a full-page screenshot" }),
),
stateName: Type.Optional(
Type.String({
description:
"Named browser state checkpoint stored under ~/.pi/agent/browser/states/",
}),
),
});
export type BrowserToolAction = (typeof browserActions)[number];
@ -76,433 +89,483 @@ export type BrowserLoadState = (typeof browserLoadStates)[number];
export type BrowserToolInput = Static<typeof browserSchema>;
export interface BrowserToolDetails {
action: BrowserToolAction;
command: string;
args: string[];
profilePath: string;
screenshotPath?: string;
statePath?: string;
action: BrowserToolAction;
command: string;
args: string[];
profilePath: string;
screenshotPath?: string;
statePath?: string;
}
export interface BrowserOperations {
exec: (
command: string,
args: string[],
options: {
cwd: string;
env: NodeJS.ProcessEnv;
onData: (data: Buffer) => void;
signal?: AbortSignal;
timeout?: number;
},
) => Promise<{ exitCode: number | null }>;
exec: (
command: string,
args: string[],
options: {
cwd: string;
env: NodeJS.ProcessEnv;
onData: (data: Buffer) => void;
signal?: AbortSignal;
timeout?: number;
},
) => Promise<{ exitCode: number | null }>;
}
const defaultBrowserOperations: BrowserOperations = {
exec: (command, args, { cwd, env, onData, signal, timeout }) => {
return new Promise((resolvePromise, rejectPromise) => {
const child = spawn(command, args, {
cwd,
detached: true,
env,
stdio: ["ignore", "pipe", "pipe"],
});
exec: (command, args, { cwd, env, onData, signal, timeout }) => {
return new Promise((resolvePromise, rejectPromise) => {
const child = spawn(command, args, {
cwd,
detached: true,
env,
stdio: ["ignore", "pipe", "pipe"],
});
let timedOut = false;
let timeoutHandle: NodeJS.Timeout | undefined;
let timedOut = false;
let timeoutHandle: NodeJS.Timeout | undefined;
if (timeout !== undefined && timeout > 0) {
timeoutHandle = setTimeout(() => {
timedOut = true;
if (child.pid) {
killProcessTree(child.pid);
}
}, timeout * 1000);
}
if (timeout !== undefined && timeout > 0) {
timeoutHandle = setTimeout(() => {
timedOut = true;
if (child.pid) {
killProcessTree(child.pid);
}
}, timeout * 1000);
}
if (child.stdout) {
child.stdout.on("data", onData);
}
if (child.stderr) {
child.stderr.on("data", onData);
}
if (child.stdout) {
child.stdout.on("data", onData);
}
if (child.stderr) {
child.stderr.on("data", onData);
}
const onAbort = () => {
if (child.pid) {
killProcessTree(child.pid);
}
};
const onAbort = () => {
if (child.pid) {
killProcessTree(child.pid);
}
};
if (signal) {
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener("abort", onAbort, { once: true });
}
}
if (signal) {
if (signal.aborted) {
onAbort();
} else {
signal.addEventListener("abort", onAbort, { once: true });
}
}
child.on("error", (error) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
rejectPromise(error);
});
child.on("error", (error) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
rejectPromise(error);
});
child.on("close", (code) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
child.on("close", (code) => {
if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort);
if (signal?.aborted) {
rejectPromise(new Error("aborted"));
return;
}
if (signal?.aborted) {
rejectPromise(new Error("aborted"));
return;
}
if (timedOut) {
rejectPromise(new Error(`timeout:${timeout}`));
return;
}
if (timedOut) {
rejectPromise(new Error(`timeout:${timeout}`));
return;
}
resolvePromise({ exitCode: code });
});
});
},
resolvePromise({ exitCode: code });
});
});
},
};
export interface BrowserToolOptions {
operations?: BrowserOperations;
command?: string;
defaultTimeoutSeconds?: number;
profileDir?: string;
stateDir?: string;
agentDir?: string;
operations?: BrowserOperations;
command?: string;
defaultTimeoutSeconds?: number;
profileDir?: string;
stateDir?: string;
agentDir?: string;
}
interface BrowserCommandContext {
action: BrowserToolAction;
args: string[];
statusMessage: string;
successMessage: string;
profilePath: string;
screenshotPath?: string;
statePath?: string;
action: BrowserToolAction;
args: string[];
statusMessage: string;
successMessage: string;
profilePath: string;
screenshotPath?: string;
statePath?: string;
}
type BrowserCommandContextWithoutProfile = Omit<BrowserCommandContext, "profilePath">;
type BrowserCommandContextWithoutProfile = Omit<
BrowserCommandContext,
"profilePath"
>;
function resolveCommandPath(cwd: string, inputPath: string): string {
return resolve(cwd, inputPath);
return resolve(cwd, inputPath);
}
function getBrowserRootDir(options?: BrowserToolOptions): string {
const baseAgentDir = options?.agentDir ?? getAgentDir();
return join(baseAgentDir, "browser");
const baseAgentDir = options?.agentDir ?? getAgentDir();
return join(baseAgentDir, "browser");
}
function getBrowserProfilePath(cwd: string, options?: BrowserToolOptions): string {
const profilePath = options?.profileDir ?? join(getBrowserRootDir(options), "profile");
return resolveCommandPath(cwd, profilePath);
function getBrowserProfilePath(
cwd: string,
options?: BrowserToolOptions,
): string {
const profilePath =
options?.profileDir ?? join(getBrowserRootDir(options), "profile");
return resolveCommandPath(cwd, profilePath);
}
function getBrowserStateDir(cwd: string, options?: BrowserToolOptions): string {
const stateDir = options?.stateDir ?? join(getBrowserRootDir(options), "states");
return resolveCommandPath(cwd, stateDir);
const stateDir =
options?.stateDir ?? join(getBrowserRootDir(options), "states");
return resolveCommandPath(cwd, stateDir);
}
function createTempScreenshotPath(): string {
const id = randomBytes(8).toString("hex");
return join(tmpdir(), `pi-browser-screenshot-${id}.png`);
const id = randomBytes(8).toString("hex");
return join(tmpdir(), `pi-browser-screenshot-${id}.png`);
}
function normalizeOutput(chunks: Buffer[]): string {
return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
}
function sanitizeStateName(stateName: string): string {
const trimmed = stateName.trim();
if (trimmed.length === 0) {
throw new Error("stateName is required for browser state actions");
}
const trimmed = stateName.trim();
if (trimmed.length === 0) {
throw new Error("stateName is required for browser state actions");
}
const withoutJsonSuffix = trimmed.endsWith(".json") ? trimmed.slice(0, -".json".length) : trimmed;
const sanitized = withoutJsonSuffix.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "");
const withoutJsonSuffix = trimmed.endsWith(".json")
? trimmed.slice(0, -".json".length)
: trimmed;
const sanitized = withoutJsonSuffix
.replace(/[^a-zA-Z0-9._-]+/g, "-")
.replace(/^-+|-+$/g, "");
if (sanitized.length === 0) {
throw new Error(`Invalid browser state name: "${stateName}"`);
}
if (sanitized.length === 0) {
throw new Error(`Invalid browser state name: "${stateName}"`);
}
return sanitized;
return sanitized;
}
function ensureBrowserDirs(profilePath: string, stateDir: string): void {
mkdirSync(profilePath, { recursive: true });
mkdirSync(stateDir, { recursive: true });
mkdirSync(profilePath, { recursive: true });
mkdirSync(stateDir, { recursive: true });
}
function createBrowserCommandContext(
profilePath: string,
stateDir: string,
context: BrowserCommandContextWithoutProfile,
profilePath: string,
stateDir: string,
context: BrowserCommandContextWithoutProfile,
): BrowserCommandContext {
ensureBrowserDirs(profilePath, stateDir);
return {
...context,
profilePath,
};
ensureBrowserDirs(profilePath, stateDir);
return {
...context,
profilePath,
};
}
function buildWaitArgs(input: BrowserToolInput): { args: string[]; status: string } {
const targets = [
input.ref !== undefined ? "ref" : undefined,
input.url !== undefined ? "url" : undefined,
input.text !== undefined ? "text" : undefined,
input.ms !== undefined ? "ms" : undefined,
input.loadState !== undefined ? "loadState" : undefined,
].filter((target): target is string => target !== undefined);
function buildWaitArgs(input: BrowserToolInput): {
args: string[];
status: string;
} {
const targets = [
input.ref !== undefined ? "ref" : undefined,
input.url !== undefined ? "url" : undefined,
input.text !== undefined ? "text" : undefined,
input.ms !== undefined ? "ms" : undefined,
input.loadState !== undefined ? "loadState" : undefined,
].filter((target): target is string => target !== undefined);
if (targets.length !== 1) {
throw new Error("browser wait requires exactly one of ref, url, text, ms, or loadState");
}
if (targets.length !== 1) {
throw new Error(
"browser wait requires exactly one of ref, url, text, ms, or loadState",
);
}
if (input.ref !== undefined) {
return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` };
}
if (input.url !== undefined) {
return {
args: ["wait", "--url", input.url],
status: `Waiting for URL ${input.url}...`,
};
}
if (input.text !== undefined) {
return {
args: ["wait", "--text", input.text],
status: `Waiting for text "${input.text}"...`,
};
}
if (input.ms !== undefined) {
return {
args: ["wait", String(input.ms)],
status: `Waiting ${input.ms}ms...`,
};
}
if (input.ref !== undefined) {
return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` };
}
if (input.url !== undefined) {
return {
args: ["wait", "--url", input.url],
status: `Waiting for URL ${input.url}...`,
};
}
if (input.text !== undefined) {
return {
args: ["wait", "--text", input.text],
status: `Waiting for text "${input.text}"...`,
};
}
if (input.ms !== undefined) {
return {
args: ["wait", String(input.ms)],
status: `Waiting ${input.ms}ms...`,
};
}
return {
args: ["wait", "--load", input.loadState!],
status: `Waiting for load state ${input.loadState}...`,
};
return {
args: ["wait", "--load", input.loadState!],
status: `Waiting for load state ${input.loadState}...`,
};
}
function buildBrowserCommand(
cwd: string,
input: BrowserToolInput,
options?: BrowserToolOptions,
cwd: string,
input: BrowserToolInput,
options?: BrowserToolOptions,
): BrowserCommandContext {
const profilePath = getBrowserProfilePath(cwd, options);
const stateDir = getBrowserStateDir(cwd, options);
const baseArgs = ["--profile", profilePath];
const profilePath = getBrowserProfilePath(cwd, options);
const stateDir = getBrowserStateDir(cwd, options);
const baseArgs = ["--profile", profilePath];
switch (input.action) {
case "open": {
if (!input.url) {
throw new Error("browser open requires url");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "open", input.url],
statusMessage: `Opening ${input.url}...`,
successMessage: `Opened ${input.url}`,
});
}
case "snapshot": {
const mode = input.mode ?? "interactive";
const args = mode === "interactive" ? [...baseArgs, "snapshot", "-i"] : [...baseArgs, "snapshot"];
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args,
statusMessage: "Capturing browser snapshot...",
successMessage: "Captured browser snapshot",
});
}
case "click": {
if (!input.ref) {
throw new Error("browser click requires ref");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "click", input.ref],
statusMessage: `Clicking ${input.ref}...`,
successMessage: `Clicked ${input.ref}`,
});
}
case "fill": {
if (!input.ref || input.value === undefined) {
throw new Error("browser fill requires ref and value");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "fill", input.ref, input.value],
statusMessage: `Filling ${input.ref}...`,
successMessage: `Filled ${input.ref}`,
});
}
case "wait": {
const wait = buildWaitArgs(input);
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, ...wait.args],
statusMessage: wait.status,
successMessage: "Browser wait condition satisfied",
});
}
case "screenshot": {
const screenshotPath = input.path ? resolveCommandPath(cwd, input.path) : createTempScreenshotPath();
const args = [...baseArgs, "screenshot"];
if (input.fullPage) {
args.push("--full");
}
args.push(screenshotPath);
switch (input.action) {
case "open": {
if (!input.url) {
throw new Error("browser open requires url");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "open", input.url],
statusMessage: `Opening ${input.url}...`,
successMessage: `Opened ${input.url}`,
});
}
case "snapshot": {
const mode = input.mode ?? "interactive";
const args =
mode === "interactive"
? [...baseArgs, "snapshot", "-i"]
: [...baseArgs, "snapshot"];
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args,
statusMessage: "Capturing browser snapshot...",
successMessage: "Captured browser snapshot",
});
}
case "click": {
if (!input.ref) {
throw new Error("browser click requires ref");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "click", input.ref],
statusMessage: `Clicking ${input.ref}...`,
successMessage: `Clicked ${input.ref}`,
});
}
case "fill": {
if (!input.ref || input.value === undefined) {
throw new Error("browser fill requires ref and value");
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "fill", input.ref, input.value],
statusMessage: `Filling ${input.ref}...`,
successMessage: `Filled ${input.ref}`,
});
}
case "wait": {
const wait = buildWaitArgs(input);
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, ...wait.args],
statusMessage: wait.status,
successMessage: "Browser wait condition satisfied",
});
}
case "screenshot": {
const screenshotPath = input.path
? resolveCommandPath(cwd, input.path)
: createTempScreenshotPath();
const args = [...baseArgs, "screenshot"];
if (input.fullPage) {
args.push("--full");
}
args.push(screenshotPath);
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args,
statusMessage: "Taking browser screenshot...",
successMessage: `Saved browser screenshot to ${screenshotPath}`,
screenshotPath,
});
}
case "state_save": {
if (!input.stateName) {
throw new Error("browser state_save requires stateName");
}
const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`);
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "state", "save", statePath],
statusMessage: `Saving browser state "${input.stateName}"...`,
successMessage: `Saved browser state "${input.stateName}" to ${statePath}`,
statePath,
});
}
case "state_load": {
if (!input.stateName) {
throw new Error("browser state_load requires stateName");
}
const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`);
if (!existsSync(statePath)) {
throw new Error(`Saved browser state "${input.stateName}" not found at ${statePath}`);
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "state", "load", statePath],
statusMessage: `Loading browser state "${input.stateName}"...`,
successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`,
statePath,
});
}
case "close":
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "close"],
statusMessage: "Closing browser...",
successMessage: "Closed browser",
});
default: {
const unsupportedAction: never = input.action;
throw new Error(`Unsupported browser action: ${unsupportedAction}`);
}
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args,
statusMessage: "Taking browser screenshot...",
successMessage: `Saved browser screenshot to ${screenshotPath}`,
screenshotPath,
});
}
case "state_save": {
if (!input.stateName) {
throw new Error("browser state_save requires stateName");
}
const statePath = join(
stateDir,
`${sanitizeStateName(input.stateName)}.json`,
);
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "state", "save", statePath],
statusMessage: `Saving browser state "${input.stateName}"...`,
successMessage: `Saved browser state "${input.stateName}" to ${statePath}`,
statePath,
});
}
case "state_load": {
if (!input.stateName) {
throw new Error("browser state_load requires stateName");
}
const statePath = join(
stateDir,
`${sanitizeStateName(input.stateName)}.json`,
);
if (!existsSync(statePath)) {
throw new Error(
`Saved browser state "${input.stateName}" not found at ${statePath}`,
);
}
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "state", "load", statePath],
statusMessage: `Loading browser state "${input.stateName}"...`,
successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`,
statePath,
});
}
case "close":
return createBrowserCommandContext(profilePath, stateDir, {
action: input.action,
args: [...baseArgs, "close"],
statusMessage: "Closing browser...",
successMessage: "Closed browser",
});
default: {
const unsupportedAction: never = input.action;
throw new Error(`Unsupported browser action: ${unsupportedAction}`);
}
}
}
function buildBrowserErrorMessage(action: BrowserToolAction, output: string, exitCode: number | null): string {
const base =
exitCode === null
? `Browser action "${action}" failed`
: `Browser action "${action}" exited with code ${exitCode}`;
return output.length > 0 ? `${output}\n\n${base}` : base;
function buildBrowserErrorMessage(
action: BrowserToolAction,
output: string,
exitCode: number | null,
): string {
const base =
exitCode === null
? `Browser action "${action}" failed`
: `Browser action "${action}" exited with code ${exitCode}`;
return output.length > 0 ? `${output}\n\n${base}` : base;
}
function getMissingBrowserCommandMessage(command: string): string {
return [
`Browser tool could not find "${command}".`,
"Install agent-browser so the first-class browser tool can run.",
"Recommended setup:",
" npm install -g agent-browser",
" agent-browser install",
"If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.",
].join("\n");
return [
`Browser tool could not find "${command}".`,
"Install agent-browser so the first-class browser tool can run.",
"Recommended setup:",
" npm install -g agent-browser",
" agent-browser install",
"If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.",
].join("\n");
}
export function createBrowserTool(cwd: string, options?: BrowserToolOptions): AgentTool<typeof browserSchema> {
const operations = options?.operations ?? defaultBrowserOperations;
const command = options?.command ?? DEFAULT_BROWSER_COMMAND;
const defaultTimeoutSeconds = options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS;
export function createBrowserTool(
cwd: string,
options?: BrowserToolOptions,
): AgentTool<typeof browserSchema> {
const operations = options?.operations ?? defaultBrowserOperations;
const command = options?.command ?? DEFAULT_BROWSER_COMMAND;
const defaultTimeoutSeconds =
options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS;
return {
name: "browser",
label: "browser",
description:
"Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.",
parameters: browserSchema,
execute: async (_toolCallId, input, signal, onUpdate) => {
const commandContext = buildBrowserCommand(cwd, input, options);
const details: BrowserToolDetails = {
action: commandContext.action,
command,
args: commandContext.args,
profilePath: commandContext.profilePath,
screenshotPath: commandContext.screenshotPath,
statePath: commandContext.statePath,
};
return {
name: "browser",
label: "browser",
description:
"Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.",
parameters: browserSchema,
execute: async (_toolCallId, input, signal, onUpdate) => {
const commandContext = buildBrowserCommand(cwd, input, options);
const details: BrowserToolDetails = {
action: commandContext.action,
command,
args: commandContext.args,
profilePath: commandContext.profilePath,
screenshotPath: commandContext.screenshotPath,
statePath: commandContext.statePath,
};
onUpdate?.({
content: [{ type: "text", text: commandContext.statusMessage }],
details,
});
onUpdate?.({
content: [{ type: "text", text: commandContext.statusMessage }],
details,
});
const chunks: Buffer[] = [];
const chunks: Buffer[] = [];
try {
const { exitCode } = await operations.exec(command, commandContext.args, {
cwd,
env: getShellEnv(),
onData: (data) => chunks.push(data),
signal,
timeout: defaultTimeoutSeconds,
});
try {
const { exitCode } = await operations.exec(
command,
commandContext.args,
{
cwd,
env: getShellEnv(),
onData: (data) => chunks.push(data),
signal,
timeout: defaultTimeoutSeconds,
},
);
const output = normalizeOutput(chunks);
if (exitCode !== 0) {
throw new Error(buildBrowserErrorMessage(commandContext.action, output, exitCode));
}
const output = normalizeOutput(chunks);
if (exitCode !== 0) {
throw new Error(
buildBrowserErrorMessage(commandContext.action, output, exitCode),
);
}
if (commandContext.action === "snapshot") {
if (output.length === 0) {
throw new Error("Browser snapshot returned no output");
}
return {
content: [{ type: "text", text: output }],
details,
};
}
if (commandContext.action === "snapshot") {
if (output.length === 0) {
throw new Error("Browser snapshot returned no output");
}
return {
content: [{ type: "text", text: output }],
details,
};
}
const text = output.length > 0 ? output : commandContext.successMessage;
return {
content: [{ type: "text", text }],
details,
};
} catch (error) {
if (error instanceof Error && "code" in error && error.code === "ENOENT") {
throw new Error(getMissingBrowserCommandMessage(command));
}
if (error instanceof Error && error.message === "aborted") {
throw new Error(`Browser action "${commandContext.action}" aborted`);
}
if (error instanceof Error && error.message.startsWith("timeout:")) {
const seconds = error.message.split(":")[1];
throw new Error(`Browser action "${commandContext.action}" timed out after ${seconds} seconds`);
}
throw error;
}
},
};
const text = output.length > 0 ? output : commandContext.successMessage;
return {
content: [{ type: "text", text }],
details,
};
} catch (error) {
if (
error instanceof Error &&
"code" in error &&
error.code === "ENOENT"
) {
throw new Error(getMissingBrowserCommandMessage(command));
}
if (error instanceof Error && error.message === "aborted") {
throw new Error(`Browser action "${commandContext.action}" aborted`);
}
if (error instanceof Error && error.message.startsWith("timeout:")) {
const seconds = error.message.split(":")[1];
throw new Error(
`Browser action "${commandContext.action}" timed out after ${seconds} seconds`,
);
}
throw error;
}
},
};
}
export const browserTool = createBrowserTool(process.cwd());

View file

@ -5,270 +5,288 @@ import { afterEach, describe, expect, it } from "vitest";
import { parseArgs } from "../src/cli/args.js";
import { buildSystemPrompt } from "../src/core/system-prompt.js";
import {
type BrowserOperations,
type BrowserToolDetails,
createAllTools,
createBrowserTool,
defaultCodingToolNames,
type BrowserOperations,
type BrowserToolDetails,
createAllTools,
createBrowserTool,
defaultCodingToolNames,
} from "../src/core/tools/index.js";
interface TextBlock {
type: "text";
text: string;
type: "text";
text: string;
}
type ToolContentBlock = TextBlock | { type: string };
interface ToolResultLike {
content: ToolContentBlock[];
details?: unknown;
content: ToolContentBlock[];
details?: unknown;
}
interface BrowserExecCall {
command: string;
args: string[];
cwd: string;
env: NodeJS.ProcessEnv;
timeout?: number;
command: string;
args: string[];
cwd: string;
env: NodeJS.ProcessEnv;
timeout?: number;
}
function getTextOutput(result: ToolResultLike): string {
return result.content
.filter((block): block is TextBlock => block.type === "text")
.map((block) => block.text)
.join("\n");
return result.content
.filter((block): block is TextBlock => block.type === "text")
.map((block) => block.text)
.join("\n");
}
function createMockBrowserOperations(
output = "",
exitCode: number | null = 0,
output = "",
exitCode: number | null = 0,
): {
calls: BrowserExecCall[];
operations: BrowserOperations;
calls: BrowserExecCall[];
operations: BrowserOperations;
} {
const calls: BrowserExecCall[] = [];
const calls: BrowserExecCall[] = [];
return {
calls,
operations: {
exec: async (command, args, options) => {
calls.push({
command,
args,
cwd: options.cwd,
env: options.env,
timeout: options.timeout,
});
if (output.length > 0) {
options.onData(Buffer.from(output, "utf-8"));
}
return { exitCode };
},
},
};
return {
calls,
operations: {
exec: async (command, args, options) => {
calls.push({
command,
args,
cwd: options.cwd,
env: options.env,
timeout: options.timeout,
});
if (output.length > 0) {
options.onData(Buffer.from(output, "utf-8"));
}
return { exitCode };
},
},
};
}
describe("browser tool", () => {
const tempDirs: string[] = [];
const tempDirs: string[] = [];
afterEach(() => {
while (tempDirs.length > 0) {
const tempDir = tempDirs.pop();
if (tempDir) {
rmSync(tempDir, { recursive: true, force: true });
}
}
});
afterEach(() => {
while (tempDirs.length > 0) {
const tempDir = tempDirs.pop();
if (tempDir) {
rmSync(tempDir, { recursive: true, force: true });
}
}
});
function createTempDir(prefix: string): string {
const tempDir = mkdtempSync(join(tmpdir(), prefix));
tempDirs.push(tempDir);
return tempDir;
}
function createTempDir(prefix: string): string {
const tempDir = mkdtempSync(join(tmpdir(), prefix));
tempDirs.push(tempDir);
return tempDir;
}
it("opens pages through agent-browser with a shared profile", async () => {
const cwd = createTempDir("coding-agent-browser-open-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
it("opens pages through agent-browser with a shared profile", async () => {
const cwd = createTempDir("coding-agent-browser-open-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, {
operations,
command: "agent-browser-test",
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
command: "agent-browser-test",
profileDir,
stateDir,
});
const result = (await browserTool.execute("browser-open", {
action: "open",
url: "https://example.com",
})) as ToolResultLike;
const result = (await browserTool.execute("browser-open", {
action: "open",
url: "https://example.com",
})) as ToolResultLike;
expect(calls).toHaveLength(1);
expect(calls[0]).toMatchObject({
command: "agent-browser-test",
args: ["--profile", profileDir, "open", "https://example.com"],
cwd,
timeout: 90,
});
expect(getTextOutput(result)).toBe("Opened https://example.com");
expect(calls).toHaveLength(1);
expect(calls[0]).toMatchObject({
command: "agent-browser-test",
args: ["--profile", profileDir, "open", "https://example.com"],
cwd,
timeout: 90,
});
expect(getTextOutput(result)).toBe("Opened https://example.com");
const details = result.details as BrowserToolDetails | undefined;
expect(details?.profilePath).toBe(profileDir);
});
const details = result.details as BrowserToolDetails | undefined;
expect(details?.profilePath).toBe(profileDir);
});
it("uses interactive snapshots by default and returns snapshot text", async () => {
const cwd = createTempDir("coding-agent-browser-snapshot-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations("main [ref=@e1]\nbutton [ref=@e2] Sign in");
it("uses interactive snapshots by default and returns snapshot text", async () => {
const cwd = createTempDir("coding-agent-browser-snapshot-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations(
"main [ref=@e1]\nbutton [ref=@e2] Sign in",
);
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const result = (await browserTool.execute("browser-snapshot", {
action: "snapshot",
})) as ToolResultLike;
const result = (await browserTool.execute("browser-snapshot", {
action: "snapshot",
})) as ToolResultLike;
expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]);
expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in");
});
expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]);
expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in");
});
it("validates wait targets before spawning agent-browser", async () => {
const cwd = createTempDir("coding-agent-browser-wait-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
it("validates wait targets before spawning agent-browser", async () => {
const cwd = createTempDir("coding-agent-browser-wait-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
await expect(
browserTool.execute("browser-wait-missing", {
action: "wait",
}),
).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState");
await expect(
browserTool.execute("browser-wait-missing", {
action: "wait",
}),
).rejects.toThrow(
"browser wait requires exactly one of ref, url, text, ms, or loadState",
);
await expect(
browserTool.execute("browser-wait-ambiguous", {
action: "wait",
ref: "@e2",
text: "Done",
}),
).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState");
await expect(
browserTool.execute("browser-wait-ambiguous", {
action: "wait",
ref: "@e2",
text: "Done",
}),
).rejects.toThrow(
"browser wait requires exactly one of ref, url, text, ms, or loadState",
);
expect(calls).toHaveLength(0);
});
expect(calls).toHaveLength(0);
});
it("preserves empty string wait targets instead of falling through to loadState", async () => {
const cwd = createTempDir("coding-agent-browser-wait-empty-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
it("preserves empty string wait targets instead of falling through to loadState", async () => {
const cwd = createTempDir("coding-agent-browser-wait-empty-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
await browserTool.execute("browser-wait-empty-text", {
action: "wait",
text: "",
});
await browserTool.execute("browser-wait-empty-text", {
action: "wait",
text: "",
});
expect(calls[0]?.args).toEqual(["--profile", profileDir, "wait", "--text", ""]);
});
expect(calls[0]?.args).toEqual([
"--profile",
profileDir,
"wait",
"--text",
"",
]);
});
it("does not create browser directories when validation fails before command construction", async () => {
const cwd = createTempDir("coding-agent-browser-invalid-open-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations();
it("does not create browser directories when validation fails before command construction", async () => {
const cwd = createTempDir("coding-agent-browser-invalid-open-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
await expect(
browserTool.execute("browser-open-missing-url", {
action: "open",
}),
).rejects.toThrow("browser open requires url");
await expect(
browserTool.execute("browser-open-missing-url", {
action: "open",
}),
).rejects.toThrow("browser open requires url");
expect(existsSync(profileDir)).toBe(false);
expect(existsSync(stateDir)).toBe(false);
});
expect(existsSync(profileDir)).toBe(false);
expect(existsSync(stateDir)).toBe(false);
});
it("stores named state under the managed browser state directory", async () => {
const cwd = createTempDir("coding-agent-browser-state-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
it("stores named state under the managed browser state directory", async () => {
const cwd = createTempDir("coding-agent-browser-state-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const result = (await browserTool.execute("browser-state-save", {
action: "state_save",
stateName: "my session/prod",
})) as ToolResultLike;
const result = (await browserTool.execute("browser-state-save", {
action: "state_save",
stateName: "my session/prod",
})) as ToolResultLike;
const expectedStatePath = join(stateDir, "my-session-prod.json");
expect(calls[0]?.args).toEqual(["--profile", profileDir, "state", "save", expectedStatePath]);
const expectedStatePath = join(stateDir, "my-session-prod.json");
expect(calls[0]?.args).toEqual([
"--profile",
profileDir,
"state",
"save",
expectedStatePath,
]);
const details = result.details as BrowserToolDetails | undefined;
expect(details?.statePath).toBe(expectedStatePath);
expect(getTextOutput(result)).toContain(expectedStatePath);
});
const details = result.details as BrowserToolDetails | undefined;
expect(details?.statePath).toBe(expectedStatePath);
expect(getTextOutput(result)).toContain(expectedStatePath);
});
it("treats null exit codes as browser failures", async () => {
const cwd = createTempDir("coding-agent-browser-null-exit-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations("browser crashed", null);
it("treats null exit codes as browser failures", async () => {
const cwd = createTempDir("coding-agent-browser-null-exit-");
const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations("browser crashed", null);
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
const browserTool = createBrowserTool(cwd, {
operations,
profileDir,
stateDir,
});
await expect(
browserTool.execute("browser-open-null-exit", {
action: "open",
url: "https://example.com",
}),
).rejects.toThrow('browser crashed\n\nBrowser action "open" failed');
});
await expect(
browserTool.execute("browser-open-null-exit", {
action: "open",
url: "https://example.com",
}),
).rejects.toThrow('browser crashed\n\nBrowser action "open" failed');
});
it("accepts browser in --tools and exposes it in default tool wiring", () => {
const parsed = parseArgs(["--tools", "browser,read"]);
expect(parsed.tools).toEqual(["browser", "read"]);
it("accepts browser in --tools and exposes it in default tool wiring", () => {
const parsed = parseArgs(["--tools", "browser,read"]);
expect(parsed.tools).toEqual(["browser", "read"]);
expect(defaultCodingToolNames).toContain("browser");
expect(createAllTools(process.cwd()).browser.name).toBe("browser");
});
expect(defaultCodingToolNames).toContain("browser");
expect(createAllTools(process.cwd()).browser.name).toBe("browser");
});
it("mentions browser in the default system prompt", () => {
const prompt = buildSystemPrompt();
it("mentions browser in the default system prompt", () => {
const prompt = buildSystemPrompt();
expect(prompt).toContain(
"- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state",
);
expect(prompt).toContain(
"Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed",
);
});
expect(prompt).toContain(
"- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state",
);
expect(prompt).toContain(
"Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed",
);
});
});