Merge pull request #260 from getcompanion-ai/codex/deterministic-hosting

deterministic hosting
This commit is contained in:
Hari 2026-03-08 17:59:58 -04:00 committed by GitHub
commit 63e33460f6
2 changed files with 689 additions and 608 deletions

View file

@ -6,68 +6,81 @@ import { join, resolve } from "node:path";
import type { AgentTool } from "@mariozechner/pi-agent-core"; import type { AgentTool } from "@mariozechner/pi-agent-core";
import { type Static, Type } from "@sinclair/typebox"; import { type Static, Type } from "@sinclair/typebox";
import { getAgentDir } from "../../config.js"; import { getAgentDir } from "../../config.js";
import { getShellEnv, killProcessTree, sanitizeBinaryOutput } from "../../utils/shell.js"; import {
getShellEnv,
killProcessTree,
sanitizeBinaryOutput,
} from "../../utils/shell.js";
const browserActions = [ const browserActions = [
"open", "open",
"snapshot", "snapshot",
"click", "click",
"fill", "fill",
"wait", "wait",
"screenshot", "screenshot",
"state_save", "state_save",
"state_load", "state_load",
"close", "close",
] as const; ] as const;
const browserSnapshotModes = ["interactive", "full"] as const; const browserSnapshotModes = ["interactive", "full"] as const;
const browserLoadStates = ["load", "domcontentloaded", "networkidle"] as const; const browserLoadStates = ["load", "domcontentloaded", "networkidle"] as const;
const DEFAULT_BROWSER_COMMAND = process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser"; const DEFAULT_BROWSER_COMMAND =
process.env.PI_AGENT_BROWSER_COMMAND || "agent-browser";
const DEFAULT_BROWSER_TIMEOUT_SECONDS = 90; const DEFAULT_BROWSER_TIMEOUT_SECONDS = 90;
const browserSchema = Type.Object({ const browserSchema = Type.Object({
action: Type.Union( action: Type.Union(
browserActions.map((action) => Type.Literal(action)), browserActions.map((action) => Type.Literal(action)),
{ description: "Browser action to execute" }, { description: "Browser action to execute" },
), ),
url: Type.Optional(Type.String({ description: "URL to open, or URL glob to wait for" })), url: Type.Optional(
mode: Type.Optional( Type.String({ description: "URL to open, or URL glob to wait for" }),
Type.Union( ),
browserSnapshotModes.map((mode) => Type.Literal(mode)), mode: Type.Optional(
{ description: "Snapshot mode. Defaults to interactive." }, Type.Union(
), browserSnapshotModes.map((mode) => Type.Literal(mode)),
), { description: "Snapshot mode. Defaults to interactive." },
ref: Type.Optional( ),
Type.String({ ),
description: "Element ref from snapshot output, such as @e2", ref: Type.Optional(
}), Type.String({
), description: "Element ref from snapshot output, such as @e2",
value: Type.Optional(Type.String({ description: "Text value to fill into a field" })), }),
text: Type.Optional(Type.String({ description: "Visible text to wait for" })), ),
ms: Type.Optional( value: Type.Optional(
Type.Number({ Type.String({ description: "Text value to fill into a field" }),
description: "Milliseconds to wait", ),
minimum: 0, text: Type.Optional(Type.String({ description: "Visible text to wait for" })),
}), ms: Type.Optional(
), Type.Number({
loadState: Type.Optional( description: "Milliseconds to wait",
Type.Union( minimum: 0,
browserLoadStates.map((state) => Type.Literal(state)), }),
{ description: "Page load state to wait for" }, ),
), loadState: Type.Optional(
), Type.Union(
path: Type.Optional( browserLoadStates.map((state) => Type.Literal(state)),
Type.String({ { description: "Page load state to wait for" },
description: "Output path for screenshots, relative to the current working directory if not absolute", ),
}), ),
), path: Type.Optional(
fullPage: Type.Optional(Type.Boolean({ description: "Capture a full-page screenshot" })), Type.String({
stateName: Type.Optional( description:
Type.String({ "Output path for screenshots, relative to the current working directory if not absolute",
description: "Named browser state checkpoint stored under ~/.pi/agent/browser/states/", }),
}), ),
), fullPage: Type.Optional(
Type.Boolean({ description: "Capture a full-page screenshot" }),
),
stateName: Type.Optional(
Type.String({
description:
"Named browser state checkpoint stored under ~/.pi/agent/browser/states/",
}),
),
}); });
export type BrowserToolAction = (typeof browserActions)[number]; export type BrowserToolAction = (typeof browserActions)[number];
@ -76,433 +89,483 @@ export type BrowserLoadState = (typeof browserLoadStates)[number];
export type BrowserToolInput = Static<typeof browserSchema>; export type BrowserToolInput = Static<typeof browserSchema>;
export interface BrowserToolDetails { export interface BrowserToolDetails {
action: BrowserToolAction; action: BrowserToolAction;
command: string; command: string;
args: string[]; args: string[];
profilePath: string; profilePath: string;
screenshotPath?: string; screenshotPath?: string;
statePath?: string; statePath?: string;
} }
export interface BrowserOperations { export interface BrowserOperations {
exec: ( exec: (
command: string, command: string,
args: string[], args: string[],
options: { options: {
cwd: string; cwd: string;
env: NodeJS.ProcessEnv; env: NodeJS.ProcessEnv;
onData: (data: Buffer) => void; onData: (data: Buffer) => void;
signal?: AbortSignal; signal?: AbortSignal;
timeout?: number; timeout?: number;
}, },
) => Promise<{ exitCode: number | null }>; ) => Promise<{ exitCode: number | null }>;
} }
const defaultBrowserOperations: BrowserOperations = { const defaultBrowserOperations: BrowserOperations = {
exec: (command, args, { cwd, env, onData, signal, timeout }) => { exec: (command, args, { cwd, env, onData, signal, timeout }) => {
return new Promise((resolvePromise, rejectPromise) => { return new Promise((resolvePromise, rejectPromise) => {
const child = spawn(command, args, { const child = spawn(command, args, {
cwd, cwd,
detached: true, detached: true,
env, env,
stdio: ["ignore", "pipe", "pipe"], stdio: ["ignore", "pipe", "pipe"],
}); });
let timedOut = false; let timedOut = false;
let timeoutHandle: NodeJS.Timeout | undefined; let timeoutHandle: NodeJS.Timeout | undefined;
if (timeout !== undefined && timeout > 0) { if (timeout !== undefined && timeout > 0) {
timeoutHandle = setTimeout(() => { timeoutHandle = setTimeout(() => {
timedOut = true; timedOut = true;
if (child.pid) { if (child.pid) {
killProcessTree(child.pid); killProcessTree(child.pid);
} }
}, timeout * 1000); }, timeout * 1000);
} }
if (child.stdout) { if (child.stdout) {
child.stdout.on("data", onData); child.stdout.on("data", onData);
} }
if (child.stderr) { if (child.stderr) {
child.stderr.on("data", onData); child.stderr.on("data", onData);
} }
const onAbort = () => { const onAbort = () => {
if (child.pid) { if (child.pid) {
killProcessTree(child.pid); killProcessTree(child.pid);
} }
}; };
if (signal) { if (signal) {
if (signal.aborted) { if (signal.aborted) {
onAbort(); onAbort();
} else { } else {
signal.addEventListener("abort", onAbort, { once: true }); signal.addEventListener("abort", onAbort, { once: true });
} }
} }
child.on("error", (error) => { child.on("error", (error) => {
if (timeoutHandle) clearTimeout(timeoutHandle); if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort); if (signal) signal.removeEventListener("abort", onAbort);
rejectPromise(error); rejectPromise(error);
}); });
child.on("close", (code) => { child.on("close", (code) => {
if (timeoutHandle) clearTimeout(timeoutHandle); if (timeoutHandle) clearTimeout(timeoutHandle);
if (signal) signal.removeEventListener("abort", onAbort); if (signal) signal.removeEventListener("abort", onAbort);
if (signal?.aborted) { if (signal?.aborted) {
rejectPromise(new Error("aborted")); rejectPromise(new Error("aborted"));
return; return;
} }
if (timedOut) { if (timedOut) {
rejectPromise(new Error(`timeout:${timeout}`)); rejectPromise(new Error(`timeout:${timeout}`));
return; return;
} }
resolvePromise({ exitCode: code }); resolvePromise({ exitCode: code });
}); });
}); });
}, },
}; };
export interface BrowserToolOptions { export interface BrowserToolOptions {
operations?: BrowserOperations; operations?: BrowserOperations;
command?: string; command?: string;
defaultTimeoutSeconds?: number; defaultTimeoutSeconds?: number;
profileDir?: string; profileDir?: string;
stateDir?: string; stateDir?: string;
agentDir?: string; agentDir?: string;
} }
interface BrowserCommandContext { interface BrowserCommandContext {
action: BrowserToolAction; action: BrowserToolAction;
args: string[]; args: string[];
statusMessage: string; statusMessage: string;
successMessage: string; successMessage: string;
profilePath: string; profilePath: string;
screenshotPath?: string; screenshotPath?: string;
statePath?: string; statePath?: string;
} }
type BrowserCommandContextWithoutProfile = Omit<BrowserCommandContext, "profilePath">; type BrowserCommandContextWithoutProfile = Omit<
BrowserCommandContext,
"profilePath"
>;
function resolveCommandPath(cwd: string, inputPath: string): string { function resolveCommandPath(cwd: string, inputPath: string): string {
return resolve(cwd, inputPath); return resolve(cwd, inputPath);
} }
function getBrowserRootDir(options?: BrowserToolOptions): string { function getBrowserRootDir(options?: BrowserToolOptions): string {
const baseAgentDir = options?.agentDir ?? getAgentDir(); const baseAgentDir = options?.agentDir ?? getAgentDir();
return join(baseAgentDir, "browser"); return join(baseAgentDir, "browser");
} }
function getBrowserProfilePath(cwd: string, options?: BrowserToolOptions): string { function getBrowserProfilePath(
const profilePath = options?.profileDir ?? join(getBrowserRootDir(options), "profile"); cwd: string,
return resolveCommandPath(cwd, profilePath); options?: BrowserToolOptions,
): string {
const profilePath =
options?.profileDir ?? join(getBrowserRootDir(options), "profile");
return resolveCommandPath(cwd, profilePath);
} }
function getBrowserStateDir(cwd: string, options?: BrowserToolOptions): string { function getBrowserStateDir(cwd: string, options?: BrowserToolOptions): string {
const stateDir = options?.stateDir ?? join(getBrowserRootDir(options), "states"); const stateDir =
return resolveCommandPath(cwd, stateDir); options?.stateDir ?? join(getBrowserRootDir(options), "states");
return resolveCommandPath(cwd, stateDir);
} }
function createTempScreenshotPath(): string { function createTempScreenshotPath(): string {
const id = randomBytes(8).toString("hex"); const id = randomBytes(8).toString("hex");
return join(tmpdir(), `pi-browser-screenshot-${id}.png`); return join(tmpdir(), `pi-browser-screenshot-${id}.png`);
} }
function normalizeOutput(chunks: Buffer[]): string { function normalizeOutput(chunks: Buffer[]): string {
return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim(); return sanitizeBinaryOutput(Buffer.concat(chunks).toString("utf-8")).trim();
} }
function sanitizeStateName(stateName: string): string { function sanitizeStateName(stateName: string): string {
const trimmed = stateName.trim(); const trimmed = stateName.trim();
if (trimmed.length === 0) { if (trimmed.length === 0) {
throw new Error("stateName is required for browser state actions"); throw new Error("stateName is required for browser state actions");
} }
const withoutJsonSuffix = trimmed.endsWith(".json") ? trimmed.slice(0, -".json".length) : trimmed; const withoutJsonSuffix = trimmed.endsWith(".json")
const sanitized = withoutJsonSuffix.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, ""); ? trimmed.slice(0, -".json".length)
: trimmed;
const sanitized = withoutJsonSuffix
.replace(/[^a-zA-Z0-9._-]+/g, "-")
.replace(/^-+|-+$/g, "");
if (sanitized.length === 0) { if (sanitized.length === 0) {
throw new Error(`Invalid browser state name: "${stateName}"`); throw new Error(`Invalid browser state name: "${stateName}"`);
} }
return sanitized; return sanitized;
} }
function ensureBrowserDirs(profilePath: string, stateDir: string): void { function ensureBrowserDirs(profilePath: string, stateDir: string): void {
mkdirSync(profilePath, { recursive: true }); mkdirSync(profilePath, { recursive: true });
mkdirSync(stateDir, { recursive: true }); mkdirSync(stateDir, { recursive: true });
} }
function createBrowserCommandContext( function createBrowserCommandContext(
profilePath: string, profilePath: string,
stateDir: string, stateDir: string,
context: BrowserCommandContextWithoutProfile, context: BrowserCommandContextWithoutProfile,
): BrowserCommandContext { ): BrowserCommandContext {
ensureBrowserDirs(profilePath, stateDir); ensureBrowserDirs(profilePath, stateDir);
return { return {
...context, ...context,
profilePath, profilePath,
}; };
} }
function buildWaitArgs(input: BrowserToolInput): { args: string[]; status: string } { function buildWaitArgs(input: BrowserToolInput): {
const targets = [ args: string[];
input.ref !== undefined ? "ref" : undefined, status: string;
input.url !== undefined ? "url" : undefined, } {
input.text !== undefined ? "text" : undefined, const targets = [
input.ms !== undefined ? "ms" : undefined, input.ref !== undefined ? "ref" : undefined,
input.loadState !== undefined ? "loadState" : undefined, input.url !== undefined ? "url" : undefined,
].filter((target): target is string => target !== undefined); input.text !== undefined ? "text" : undefined,
input.ms !== undefined ? "ms" : undefined,
input.loadState !== undefined ? "loadState" : undefined,
].filter((target): target is string => target !== undefined);
if (targets.length !== 1) { if (targets.length !== 1) {
throw new Error("browser wait requires exactly one of ref, url, text, ms, or loadState"); throw new Error(
} "browser wait requires exactly one of ref, url, text, ms, or loadState",
);
}
if (input.ref !== undefined) { if (input.ref !== undefined) {
return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` }; return { args: ["wait", input.ref], status: `Waiting for ${input.ref}...` };
} }
if (input.url !== undefined) { if (input.url !== undefined) {
return { return {
args: ["wait", "--url", input.url], args: ["wait", "--url", input.url],
status: `Waiting for URL ${input.url}...`, status: `Waiting for URL ${input.url}...`,
}; };
} }
if (input.text !== undefined) { if (input.text !== undefined) {
return { return {
args: ["wait", "--text", input.text], args: ["wait", "--text", input.text],
status: `Waiting for text "${input.text}"...`, status: `Waiting for text "${input.text}"...`,
}; };
} }
if (input.ms !== undefined) { if (input.ms !== undefined) {
return { return {
args: ["wait", String(input.ms)], args: ["wait", String(input.ms)],
status: `Waiting ${input.ms}ms...`, status: `Waiting ${input.ms}ms...`,
}; };
} }
return { return {
args: ["wait", "--load", input.loadState!], args: ["wait", "--load", input.loadState!],
status: `Waiting for load state ${input.loadState}...`, status: `Waiting for load state ${input.loadState}...`,
}; };
} }
function buildBrowserCommand( function buildBrowserCommand(
cwd: string, cwd: string,
input: BrowserToolInput, input: BrowserToolInput,
options?: BrowserToolOptions, options?: BrowserToolOptions,
): BrowserCommandContext { ): BrowserCommandContext {
const profilePath = getBrowserProfilePath(cwd, options); const profilePath = getBrowserProfilePath(cwd, options);
const stateDir = getBrowserStateDir(cwd, options); const stateDir = getBrowserStateDir(cwd, options);
const baseArgs = ["--profile", profilePath]; const baseArgs = ["--profile", profilePath];
switch (input.action) { switch (input.action) {
case "open": { case "open": {
if (!input.url) { if (!input.url) {
throw new Error("browser open requires url"); throw new Error("browser open requires url");
} }
return createBrowserCommandContext(profilePath, stateDir, { return createBrowserCommandContext(profilePath, stateDir, {
action: input.action, action: input.action,
args: [...baseArgs, "open", input.url], args: [...baseArgs, "open", input.url],
statusMessage: `Opening ${input.url}...`, statusMessage: `Opening ${input.url}...`,
successMessage: `Opened ${input.url}`, successMessage: `Opened ${input.url}`,
}); });
} }
case "snapshot": { case "snapshot": {
const mode = input.mode ?? "interactive"; const mode = input.mode ?? "interactive";
const args = mode === "interactive" ? [...baseArgs, "snapshot", "-i"] : [...baseArgs, "snapshot"]; const args =
return createBrowserCommandContext(profilePath, stateDir, { mode === "interactive"
action: input.action, ? [...baseArgs, "snapshot", "-i"]
args, : [...baseArgs, "snapshot"];
statusMessage: "Capturing browser snapshot...", return createBrowserCommandContext(profilePath, stateDir, {
successMessage: "Captured browser snapshot", action: input.action,
}); args,
} statusMessage: "Capturing browser snapshot...",
case "click": { successMessage: "Captured browser snapshot",
if (!input.ref) { });
throw new Error("browser click requires ref"); }
} case "click": {
return createBrowserCommandContext(profilePath, stateDir, { if (!input.ref) {
action: input.action, throw new Error("browser click requires ref");
args: [...baseArgs, "click", input.ref], }
statusMessage: `Clicking ${input.ref}...`, return createBrowserCommandContext(profilePath, stateDir, {
successMessage: `Clicked ${input.ref}`, action: input.action,
}); args: [...baseArgs, "click", input.ref],
} statusMessage: `Clicking ${input.ref}...`,
case "fill": { successMessage: `Clicked ${input.ref}`,
if (!input.ref || input.value === undefined) { });
throw new Error("browser fill requires ref and value"); }
} case "fill": {
return createBrowserCommandContext(profilePath, stateDir, { if (!input.ref || input.value === undefined) {
action: input.action, throw new Error("browser fill requires ref and value");
args: [...baseArgs, "fill", input.ref, input.value], }
statusMessage: `Filling ${input.ref}...`, return createBrowserCommandContext(profilePath, stateDir, {
successMessage: `Filled ${input.ref}`, action: input.action,
}); args: [...baseArgs, "fill", input.ref, input.value],
} statusMessage: `Filling ${input.ref}...`,
case "wait": { successMessage: `Filled ${input.ref}`,
const wait = buildWaitArgs(input); });
return createBrowserCommandContext(profilePath, stateDir, { }
action: input.action, case "wait": {
args: [...baseArgs, ...wait.args], const wait = buildWaitArgs(input);
statusMessage: wait.status, return createBrowserCommandContext(profilePath, stateDir, {
successMessage: "Browser wait condition satisfied", action: input.action,
}); args: [...baseArgs, ...wait.args],
} statusMessage: wait.status,
case "screenshot": { successMessage: "Browser wait condition satisfied",
const screenshotPath = input.path ? resolveCommandPath(cwd, input.path) : createTempScreenshotPath(); });
const args = [...baseArgs, "screenshot"]; }
if (input.fullPage) { case "screenshot": {
args.push("--full"); const screenshotPath = input.path
} ? resolveCommandPath(cwd, input.path)
args.push(screenshotPath); : createTempScreenshotPath();
const args = [...baseArgs, "screenshot"];
if (input.fullPage) {
args.push("--full");
}
args.push(screenshotPath);
return createBrowserCommandContext(profilePath, stateDir, { return createBrowserCommandContext(profilePath, stateDir, {
action: input.action, action: input.action,
args, args,
statusMessage: "Taking browser screenshot...", statusMessage: "Taking browser screenshot...",
successMessage: `Saved browser screenshot to ${screenshotPath}`, successMessage: `Saved browser screenshot to ${screenshotPath}`,
screenshotPath, screenshotPath,
}); });
} }
case "state_save": { case "state_save": {
if (!input.stateName) { if (!input.stateName) {
throw new Error("browser state_save requires stateName"); throw new Error("browser state_save requires stateName");
} }
const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`); const statePath = join(
return createBrowserCommandContext(profilePath, stateDir, { stateDir,
action: input.action, `${sanitizeStateName(input.stateName)}.json`,
args: [...baseArgs, "state", "save", statePath], );
statusMessage: `Saving browser state "${input.stateName}"...`, return createBrowserCommandContext(profilePath, stateDir, {
successMessage: `Saved browser state "${input.stateName}" to ${statePath}`, action: input.action,
statePath, args: [...baseArgs, "state", "save", statePath],
}); statusMessage: `Saving browser state "${input.stateName}"...`,
} successMessage: `Saved browser state "${input.stateName}" to ${statePath}`,
case "state_load": { statePath,
if (!input.stateName) { });
throw new Error("browser state_load requires stateName"); }
} case "state_load": {
const statePath = join(stateDir, `${sanitizeStateName(input.stateName)}.json`); if (!input.stateName) {
if (!existsSync(statePath)) { throw new Error("browser state_load requires stateName");
throw new Error(`Saved browser state "${input.stateName}" not found at ${statePath}`); }
} const statePath = join(
return createBrowserCommandContext(profilePath, stateDir, { stateDir,
action: input.action, `${sanitizeStateName(input.stateName)}.json`,
args: [...baseArgs, "state", "load", statePath], );
statusMessage: `Loading browser state "${input.stateName}"...`, if (!existsSync(statePath)) {
successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`, throw new Error(
statePath, `Saved browser state "${input.stateName}" not found at ${statePath}`,
}); );
} }
case "close": return createBrowserCommandContext(profilePath, stateDir, {
return createBrowserCommandContext(profilePath, stateDir, { action: input.action,
action: input.action, args: [...baseArgs, "state", "load", statePath],
args: [...baseArgs, "close"], statusMessage: `Loading browser state "${input.stateName}"...`,
statusMessage: "Closing browser...", successMessage: `Loaded browser state "${input.stateName}" from ${statePath}`,
successMessage: "Closed browser", statePath,
}); });
default: { }
const unsupportedAction: never = input.action; case "close":
throw new Error(`Unsupported browser action: ${unsupportedAction}`); return createBrowserCommandContext(profilePath, stateDir, {
} action: input.action,
} args: [...baseArgs, "close"],
statusMessage: "Closing browser...",
successMessage: "Closed browser",
});
default: {
const unsupportedAction: never = input.action;
throw new Error(`Unsupported browser action: ${unsupportedAction}`);
}
}
} }
function buildBrowserErrorMessage(action: BrowserToolAction, output: string, exitCode: number | null): string { function buildBrowserErrorMessage(
const base = action: BrowserToolAction,
exitCode === null output: string,
? `Browser action "${action}" failed` exitCode: number | null,
: `Browser action "${action}" exited with code ${exitCode}`; ): string {
return output.length > 0 ? `${output}\n\n${base}` : base; const base =
exitCode === null
? `Browser action "${action}" failed`
: `Browser action "${action}" exited with code ${exitCode}`;
return output.length > 0 ? `${output}\n\n${base}` : base;
} }
function getMissingBrowserCommandMessage(command: string): string { function getMissingBrowserCommandMessage(command: string): string {
return [ return [
`Browser tool could not find "${command}".`, `Browser tool could not find "${command}".`,
"Install agent-browser so the first-class browser tool can run.", "Install agent-browser so the first-class browser tool can run.",
"Recommended setup:", "Recommended setup:",
" npm install -g agent-browser", " npm install -g agent-browser",
" agent-browser install", " agent-browser install",
"If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.", "If Chromium lives at a custom path, set AGENT_BROWSER_EXECUTABLE_PATH.",
].join("\n"); ].join("\n");
} }
export function createBrowserTool(cwd: string, options?: BrowserToolOptions): AgentTool<typeof browserSchema> { export function createBrowserTool(
const operations = options?.operations ?? defaultBrowserOperations; cwd: string,
const command = options?.command ?? DEFAULT_BROWSER_COMMAND; options?: BrowserToolOptions,
const defaultTimeoutSeconds = options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS; ): AgentTool<typeof browserSchema> {
const operations = options?.operations ?? defaultBrowserOperations;
const command = options?.command ?? DEFAULT_BROWSER_COMMAND;
const defaultTimeoutSeconds =
options?.defaultTimeoutSeconds ?? DEFAULT_BROWSER_TIMEOUT_SECONDS;
return { return {
name: "browser", name: "browser",
label: "browser", label: "browser",
description: description:
"Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.", "Use a persistent browser for websites: open pages, inspect them with snapshot, click or fill elements, wait for changes, take screenshots, and save or load named browser state.",
parameters: browserSchema, parameters: browserSchema,
execute: async (_toolCallId, input, signal, onUpdate) => { execute: async (_toolCallId, input, signal, onUpdate) => {
const commandContext = buildBrowserCommand(cwd, input, options); const commandContext = buildBrowserCommand(cwd, input, options);
const details: BrowserToolDetails = { const details: BrowserToolDetails = {
action: commandContext.action, action: commandContext.action,
command, command,
args: commandContext.args, args: commandContext.args,
profilePath: commandContext.profilePath, profilePath: commandContext.profilePath,
screenshotPath: commandContext.screenshotPath, screenshotPath: commandContext.screenshotPath,
statePath: commandContext.statePath, statePath: commandContext.statePath,
}; };
onUpdate?.({ onUpdate?.({
content: [{ type: "text", text: commandContext.statusMessage }], content: [{ type: "text", text: commandContext.statusMessage }],
details, details,
}); });
const chunks: Buffer[] = []; const chunks: Buffer[] = [];
try { try {
const { exitCode } = await operations.exec(command, commandContext.args, { const { exitCode } = await operations.exec(
cwd, command,
env: getShellEnv(), commandContext.args,
onData: (data) => chunks.push(data), {
signal, cwd,
timeout: defaultTimeoutSeconds, env: getShellEnv(),
}); onData: (data) => chunks.push(data),
signal,
timeout: defaultTimeoutSeconds,
},
);
const output = normalizeOutput(chunks); const output = normalizeOutput(chunks);
if (exitCode !== 0) { if (exitCode !== 0) {
throw new Error(buildBrowserErrorMessage(commandContext.action, output, exitCode)); throw new Error(
} buildBrowserErrorMessage(commandContext.action, output, exitCode),
);
}
if (commandContext.action === "snapshot") { if (commandContext.action === "snapshot") {
if (output.length === 0) { if (output.length === 0) {
throw new Error("Browser snapshot returned no output"); throw new Error("Browser snapshot returned no output");
} }
return { return {
content: [{ type: "text", text: output }], content: [{ type: "text", text: output }],
details, details,
}; };
} }
const text = output.length > 0 ? output : commandContext.successMessage; const text = output.length > 0 ? output : commandContext.successMessage;
return { return {
content: [{ type: "text", text }], content: [{ type: "text", text }],
details, details,
}; };
} catch (error) { } catch (error) {
if (error instanceof Error && "code" in error && error.code === "ENOENT") { if (
throw new Error(getMissingBrowserCommandMessage(command)); error instanceof Error &&
} "code" in error &&
if (error instanceof Error && error.message === "aborted") { error.code === "ENOENT"
throw new Error(`Browser action "${commandContext.action}" aborted`); ) {
} throw new Error(getMissingBrowserCommandMessage(command));
if (error instanceof Error && error.message.startsWith("timeout:")) { }
const seconds = error.message.split(":")[1]; if (error instanceof Error && error.message === "aborted") {
throw new Error(`Browser action "${commandContext.action}" timed out after ${seconds} seconds`); throw new Error(`Browser action "${commandContext.action}" aborted`);
} }
throw error; if (error instanceof Error && error.message.startsWith("timeout:")) {
} const seconds = error.message.split(":")[1];
}, throw new Error(
}; `Browser action "${commandContext.action}" timed out after ${seconds} seconds`,
);
}
throw error;
}
},
};
} }
export const browserTool = createBrowserTool(process.cwd()); export const browserTool = createBrowserTool(process.cwd());

View file

@ -5,270 +5,288 @@ import { afterEach, describe, expect, it } from "vitest";
import { parseArgs } from "../src/cli/args.js"; import { parseArgs } from "../src/cli/args.js";
import { buildSystemPrompt } from "../src/core/system-prompt.js"; import { buildSystemPrompt } from "../src/core/system-prompt.js";
import { import {
type BrowserOperations, type BrowserOperations,
type BrowserToolDetails, type BrowserToolDetails,
createAllTools, createAllTools,
createBrowserTool, createBrowserTool,
defaultCodingToolNames, defaultCodingToolNames,
} from "../src/core/tools/index.js"; } from "../src/core/tools/index.js";
interface TextBlock { interface TextBlock {
type: "text"; type: "text";
text: string; text: string;
} }
type ToolContentBlock = TextBlock | { type: string }; type ToolContentBlock = TextBlock | { type: string };
interface ToolResultLike { interface ToolResultLike {
content: ToolContentBlock[]; content: ToolContentBlock[];
details?: unknown; details?: unknown;
} }
interface BrowserExecCall { interface BrowserExecCall {
command: string; command: string;
args: string[]; args: string[];
cwd: string; cwd: string;
env: NodeJS.ProcessEnv; env: NodeJS.ProcessEnv;
timeout?: number; timeout?: number;
} }
function getTextOutput(result: ToolResultLike): string { function getTextOutput(result: ToolResultLike): string {
return result.content return result.content
.filter((block): block is TextBlock => block.type === "text") .filter((block): block is TextBlock => block.type === "text")
.map((block) => block.text) .map((block) => block.text)
.join("\n"); .join("\n");
} }
function createMockBrowserOperations( function createMockBrowserOperations(
output = "", output = "",
exitCode: number | null = 0, exitCode: number | null = 0,
): { ): {
calls: BrowserExecCall[]; calls: BrowserExecCall[];
operations: BrowserOperations; operations: BrowserOperations;
} { } {
const calls: BrowserExecCall[] = []; const calls: BrowserExecCall[] = [];
return { return {
calls, calls,
operations: { operations: {
exec: async (command, args, options) => { exec: async (command, args, options) => {
calls.push({ calls.push({
command, command,
args, args,
cwd: options.cwd, cwd: options.cwd,
env: options.env, env: options.env,
timeout: options.timeout, timeout: options.timeout,
}); });
if (output.length > 0) { if (output.length > 0) {
options.onData(Buffer.from(output, "utf-8")); options.onData(Buffer.from(output, "utf-8"));
} }
return { exitCode }; return { exitCode };
}, },
}, },
}; };
} }
describe("browser tool", () => { describe("browser tool", () => {
const tempDirs: string[] = []; const tempDirs: string[] = [];
afterEach(() => { afterEach(() => {
while (tempDirs.length > 0) { while (tempDirs.length > 0) {
const tempDir = tempDirs.pop(); const tempDir = tempDirs.pop();
if (tempDir) { if (tempDir) {
rmSync(tempDir, { recursive: true, force: true }); rmSync(tempDir, { recursive: true, force: true });
} }
} }
}); });
function createTempDir(prefix: string): string { function createTempDir(prefix: string): string {
const tempDir = mkdtempSync(join(tmpdir(), prefix)); const tempDir = mkdtempSync(join(tmpdir(), prefix));
tempDirs.push(tempDir); tempDirs.push(tempDir);
return tempDir; return tempDir;
} }
it("opens pages through agent-browser with a shared profile", async () => { it("opens pages through agent-browser with a shared profile", async () => {
const cwd = createTempDir("coding-agent-browser-open-"); const cwd = createTempDir("coding-agent-browser-open-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations(); const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
command: "agent-browser-test", command: "agent-browser-test",
profileDir, profileDir,
stateDir, stateDir,
}); });
const result = (await browserTool.execute("browser-open", { const result = (await browserTool.execute("browser-open", {
action: "open", action: "open",
url: "https://example.com", url: "https://example.com",
})) as ToolResultLike; })) as ToolResultLike;
expect(calls).toHaveLength(1); expect(calls).toHaveLength(1);
expect(calls[0]).toMatchObject({ expect(calls[0]).toMatchObject({
command: "agent-browser-test", command: "agent-browser-test",
args: ["--profile", profileDir, "open", "https://example.com"], args: ["--profile", profileDir, "open", "https://example.com"],
cwd, cwd,
timeout: 90, timeout: 90,
}); });
expect(getTextOutput(result)).toBe("Opened https://example.com"); expect(getTextOutput(result)).toBe("Opened https://example.com");
const details = result.details as BrowserToolDetails | undefined; const details = result.details as BrowserToolDetails | undefined;
expect(details?.profilePath).toBe(profileDir); expect(details?.profilePath).toBe(profileDir);
}); });
it("uses interactive snapshots by default and returns snapshot text", async () => { it("uses interactive snapshots by default and returns snapshot text", async () => {
const cwd = createTempDir("coding-agent-browser-snapshot-"); const cwd = createTempDir("coding-agent-browser-snapshot-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations("main [ref=@e1]\nbutton [ref=@e2] Sign in"); const { calls, operations } = createMockBrowserOperations(
"main [ref=@e1]\nbutton [ref=@e2] Sign in",
);
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
const result = (await browserTool.execute("browser-snapshot", { const result = (await browserTool.execute("browser-snapshot", {
action: "snapshot", action: "snapshot",
})) as ToolResultLike; })) as ToolResultLike;
expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]); expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]);
expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in"); expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in");
}); });
it("validates wait targets before spawning agent-browser", async () => { it("validates wait targets before spawning agent-browser", async () => {
const cwd = createTempDir("coding-agent-browser-wait-"); const cwd = createTempDir("coding-agent-browser-wait-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations(); const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
await expect( await expect(
browserTool.execute("browser-wait-missing", { browserTool.execute("browser-wait-missing", {
action: "wait", action: "wait",
}), }),
).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState"); ).rejects.toThrow(
"browser wait requires exactly one of ref, url, text, ms, or loadState",
);
await expect( await expect(
browserTool.execute("browser-wait-ambiguous", { browserTool.execute("browser-wait-ambiguous", {
action: "wait", action: "wait",
ref: "@e2", ref: "@e2",
text: "Done", text: "Done",
}), }),
).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState"); ).rejects.toThrow(
"browser wait requires exactly one of ref, url, text, ms, or loadState",
);
expect(calls).toHaveLength(0); expect(calls).toHaveLength(0);
}); });
it("preserves empty string wait targets instead of falling through to loadState", async () => { it("preserves empty string wait targets instead of falling through to loadState", async () => {
const cwd = createTempDir("coding-agent-browser-wait-empty-"); const cwd = createTempDir("coding-agent-browser-wait-empty-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations(); const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
await browserTool.execute("browser-wait-empty-text", { await browserTool.execute("browser-wait-empty-text", {
action: "wait", action: "wait",
text: "", text: "",
}); });
expect(calls[0]?.args).toEqual(["--profile", profileDir, "wait", "--text", ""]); expect(calls[0]?.args).toEqual([
}); "--profile",
profileDir,
"wait",
"--text",
"",
]);
});
it("does not create browser directories when validation fails before command construction", async () => { it("does not create browser directories when validation fails before command construction", async () => {
const cwd = createTempDir("coding-agent-browser-invalid-open-"); const cwd = createTempDir("coding-agent-browser-invalid-open-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations(); const { operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
await expect( await expect(
browserTool.execute("browser-open-missing-url", { browserTool.execute("browser-open-missing-url", {
action: "open", action: "open",
}), }),
).rejects.toThrow("browser open requires url"); ).rejects.toThrow("browser open requires url");
expect(existsSync(profileDir)).toBe(false); expect(existsSync(profileDir)).toBe(false);
expect(existsSync(stateDir)).toBe(false); expect(existsSync(stateDir)).toBe(false);
}); });
it("stores named state under the managed browser state directory", async () => { it("stores named state under the managed browser state directory", async () => {
const cwd = createTempDir("coding-agent-browser-state-"); const cwd = createTempDir("coding-agent-browser-state-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { calls, operations } = createMockBrowserOperations(); const { calls, operations } = createMockBrowserOperations();
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
const result = (await browserTool.execute("browser-state-save", { const result = (await browserTool.execute("browser-state-save", {
action: "state_save", action: "state_save",
stateName: "my session/prod", stateName: "my session/prod",
})) as ToolResultLike; })) as ToolResultLike;
const expectedStatePath = join(stateDir, "my-session-prod.json"); const expectedStatePath = join(stateDir, "my-session-prod.json");
expect(calls[0]?.args).toEqual(["--profile", profileDir, "state", "save", expectedStatePath]); expect(calls[0]?.args).toEqual([
"--profile",
profileDir,
"state",
"save",
expectedStatePath,
]);
const details = result.details as BrowserToolDetails | undefined; const details = result.details as BrowserToolDetails | undefined;
expect(details?.statePath).toBe(expectedStatePath); expect(details?.statePath).toBe(expectedStatePath);
expect(getTextOutput(result)).toContain(expectedStatePath); expect(getTextOutput(result)).toContain(expectedStatePath);
}); });
it("treats null exit codes as browser failures", async () => { it("treats null exit codes as browser failures", async () => {
const cwd = createTempDir("coding-agent-browser-null-exit-"); const cwd = createTempDir("coding-agent-browser-null-exit-");
const profileDir = join(cwd, "profile"); const profileDir = join(cwd, "profile");
const stateDir = join(cwd, "states"); const stateDir = join(cwd, "states");
const { operations } = createMockBrowserOperations("browser crashed", null); const { operations } = createMockBrowserOperations("browser crashed", null);
const browserTool = createBrowserTool(cwd, { const browserTool = createBrowserTool(cwd, {
operations, operations,
profileDir, profileDir,
stateDir, stateDir,
}); });
await expect( await expect(
browserTool.execute("browser-open-null-exit", { browserTool.execute("browser-open-null-exit", {
action: "open", action: "open",
url: "https://example.com", url: "https://example.com",
}), }),
).rejects.toThrow('browser crashed\n\nBrowser action "open" failed'); ).rejects.toThrow('browser crashed\n\nBrowser action "open" failed');
}); });
it("accepts browser in --tools and exposes it in default tool wiring", () => { it("accepts browser in --tools and exposes it in default tool wiring", () => {
const parsed = parseArgs(["--tools", "browser,read"]); const parsed = parseArgs(["--tools", "browser,read"]);
expect(parsed.tools).toEqual(["browser", "read"]); expect(parsed.tools).toEqual(["browser", "read"]);
expect(defaultCodingToolNames).toContain("browser"); expect(defaultCodingToolNames).toContain("browser");
expect(createAllTools(process.cwd()).browser.name).toBe("browser"); expect(createAllTools(process.cwd()).browser.name).toBe("browser");
}); });
it("mentions browser in the default system prompt", () => { it("mentions browser in the default system prompt", () => {
const prompt = buildSystemPrompt(); const prompt = buildSystemPrompt();
expect(prompt).toContain( expect(prompt).toContain(
"- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state", "- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state",
); );
expect(prompt).toContain( expect(prompt).toContain(
"Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed", "Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed",
); );
}); });
}); });