feat: add built-in browser tool

Add a first-class browser tool backed by agent-browser and enable it by default in coding sessions. Include CLI and system-prompt wiring plus focused coverage for the new tool. Co-authored-by: Codex <noreply@openai.com>
2026-04-15 09:01:13 +00:00 · 2026-03-08 12:50:02 -07:00 · 2026-03-08 12:50:02 -07:00 · 0720c47495
commit 0720c47495
parent df702d95a3
8 changed files with 797 additions and 15 deletions
--- a/packages/coding-agent/test/browser-tool.test.ts
+++ b/packages/coding-agent/test/browser-tool.test.ts
@ -0,0 +1,212 @@
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+import { parseArgs } from "../src/cli/args.js";
+import { buildSystemPrompt } from "../src/core/system-prompt.js";
+import {
+	type BrowserOperations,
+	type BrowserToolDetails,
+	createAllTools,
+	createBrowserTool,
+	defaultCodingToolNames,
+} from "../src/core/tools/index.js";
+
+interface TextBlock {
+	type: "text";
+	text: string;
+}
+
+type ToolContentBlock = TextBlock | { type: string };
+
+interface ToolResultLike {
+	content: ToolContentBlock[];
+	details?: unknown;
+}
+
+interface BrowserExecCall {
+	command: string;
+	args: string[];
+	cwd: string;
+	env: NodeJS.ProcessEnv;
+	timeout?: number;
+}
+
+function getTextOutput(result: ToolResultLike): string {
+	return result.content
+		.filter((block): block is TextBlock => block.type === "text")
+		.map((block) => block.text)
+		.join("\n");
+}
+
+function createMockBrowserOperations(
+	output = "",
+	exitCode = 0,
+): {
+	calls: BrowserExecCall[];
+	operations: BrowserOperations;
+} {
+	const calls: BrowserExecCall[] = [];
+
+	return {
+		calls,
+		operations: {
+			exec: async (command, args, options) => {
+				calls.push({
+					command,
+					args,
+					cwd: options.cwd,
+					env: options.env,
+					timeout: options.timeout,
+				});
+				if (output.length > 0) {
+					options.onData(Buffer.from(output, "utf-8"));
+				}
+				return { exitCode };
+			},
+		},
+	};
+}
+
+describe("browser tool", () => {
+	const tempDirs: string[] = [];
+
+	afterEach(() => {
+		while (tempDirs.length > 0) {
+			const tempDir = tempDirs.pop();
+			if (tempDir) {
+				rmSync(tempDir, { recursive: true, force: true });
+			}
+		}
+	});
+
+	function createTempDir(prefix: string): string {
+		const tempDir = mkdtempSync(join(tmpdir(), prefix));
+		tempDirs.push(tempDir);
+		return tempDir;
+	}
+
+	it("opens pages through agent-browser with a shared profile", async () => {
+		const cwd = createTempDir("coding-agent-browser-open-");
+		const profileDir = join(cwd, "profile");
+		const stateDir = join(cwd, "states");
+		const { calls, operations } = createMockBrowserOperations();
+
+		const browserTool = createBrowserTool(cwd, {
+			operations,
+			command: "agent-browser-test",
+			profileDir,
+			stateDir,
+		});
+
+		const result = (await browserTool.execute("browser-open", {
+			action: "open",
+			url: "https://example.com",
+		})) as ToolResultLike;
+
+		expect(calls).toHaveLength(1);
+		expect(calls[0]).toMatchObject({
+			command: "agent-browser-test",
+			args: ["--profile", profileDir, "open", "https://example.com"],
+			cwd,
+			timeout: 90,
+		});
+		expect(getTextOutput(result)).toBe("Opened https://example.com");
+
+		const details = result.details as BrowserToolDetails | undefined;
+		expect(details?.profilePath).toBe(profileDir);
+	});
+
+	it("uses interactive snapshots by default and returns snapshot text", async () => {
+		const cwd = createTempDir("coding-agent-browser-snapshot-");
+		const profileDir = join(cwd, "profile");
+		const stateDir = join(cwd, "states");
+		const { calls, operations } = createMockBrowserOperations("main [ref=@e1]\nbutton [ref=@e2] Sign in");
+
+		const browserTool = createBrowserTool(cwd, {
+			operations,
+			profileDir,
+			stateDir,
+		});
+
+		const result = (await browserTool.execute("browser-snapshot", {
+			action: "snapshot",
+		})) as ToolResultLike;
+
+		expect(calls[0]?.args).toEqual(["--profile", profileDir, "snapshot", "-i"]);
+		expect(getTextOutput(result)).toContain("button [ref=@e2] Sign in");
+	});
+
+	it("validates wait targets before spawning agent-browser", async () => {
+		const cwd = createTempDir("coding-agent-browser-wait-");
+		const profileDir = join(cwd, "profile");
+		const stateDir = join(cwd, "states");
+		const { calls, operations } = createMockBrowserOperations();
+
+		const browserTool = createBrowserTool(cwd, {
+			operations,
+			profileDir,
+			stateDir,
+		});
+
+		await expect(
+			browserTool.execute("browser-wait-missing", {
+				action: "wait",
+			}),
+		).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState");
+
+		await expect(
+			browserTool.execute("browser-wait-ambiguous", {
+				action: "wait",
+				ref: "@e2",
+				text: "Done",
+			}),
+		).rejects.toThrow("browser wait requires exactly one of ref, url, text, ms, or loadState");
+
+		expect(calls).toHaveLength(0);
+	});
+
+	it("stores named state under the managed browser state directory", async () => {
+		const cwd = createTempDir("coding-agent-browser-state-");
+		const profileDir = join(cwd, "profile");
+		const stateDir = join(cwd, "states");
+		const { calls, operations } = createMockBrowserOperations();
+
+		const browserTool = createBrowserTool(cwd, {
+			operations,
+			profileDir,
+			stateDir,
+		});
+
+		const result = (await browserTool.execute("browser-state-save", {
+			action: "state_save",
+			stateName: "my session/prod",
+		})) as ToolResultLike;
+
+		const expectedStatePath = join(stateDir, "my-session-prod.json");
+		expect(calls[0]?.args).toEqual(["--profile", profileDir, "state", "save", expectedStatePath]);
+
+		const details = result.details as BrowserToolDetails | undefined;
+		expect(details?.statePath).toBe(expectedStatePath);
+		expect(getTextOutput(result)).toContain(expectedStatePath);
+	});
+
+	it("accepts browser in --tools and exposes it in default tool wiring", () => {
+		const parsed = parseArgs(["--tools", "browser,read"]);
+		expect(parsed.tools).toEqual(["browser", "read"]);
+
+		expect(defaultCodingToolNames).toContain("browser");
+		expect(createAllTools(process.cwd()).browser.name).toBe("browser");
+	});
+
+	it("mentions browser in the default system prompt", () => {
+		const prompt = buildSystemPrompt();
+
+		expect(prompt).toContain(
+			"- browser: Open websites, inspect pages with snapshot, click/fill/wait, take screenshots, and save/load browser state",
+		);
+		expect(prompt).toContain(
+			"Use browser for website tasks. Open the page, use snapshot to inspect interactive elements, then click, fill, wait, or screenshot as needed",
+		);
+	});
+});