mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 06:04:43 +00:00
feat: [US-015] - Add browser console and network monitoring endpoints
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5d52010c5e
commit
47312b2a4e
5 changed files with 1014 additions and 1 deletions
493
scripts/ralph/prd.json
Normal file
493
scripts/ralph/prd.json
Normal file
|
|
@ -0,0 +1,493 @@
|
||||||
|
{
|
||||||
|
"project": "SandboxAgent",
|
||||||
|
"branchName": "ralph/browser-automation",
|
||||||
|
"description": "Browser Automation - HTTP API, CLI install, TypeScript SDK, inspector UI, and Rust modules for controlling Chromium via CDP inside sandboxes",
|
||||||
|
"userStories": [
|
||||||
|
{
|
||||||
|
"id": "US-001",
|
||||||
|
"title": "Add browser_install.rs CLI command",
|
||||||
|
"description": "As a developer, I need a CLI command to install Chromium and browser dependencies into the sandbox.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_install.rs following desktop_install.rs pattern",
|
||||||
|
"BrowserInstallRequest struct with yes, print_only, package_manager fields",
|
||||||
|
"install_browser function: platform check (Linux only), detect/validate package manager, build package list, privilege check, display + confirm, run install",
|
||||||
|
"APT packages: chromium, chromium-sandbox, libnss3, libatk-bridge2.0-0, libdrm2, libxcomposite1, libxdamage1, libxrandr2, libgbm1, libasound2, libpangocairo-1.0-0, libgtk-3-0",
|
||||||
|
"DNF packages: chromium",
|
||||||
|
"APK packages: chromium, nss",
|
||||||
|
"detect_missing_browser_dependencies() checks for chromium/chromium-browser in PATH and desktop deps",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 1,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Reuse detect_package_manager and DesktopPackageManager from desktop_install.rs"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-002",
|
||||||
|
"title": "Register install browser subcommand in CLI",
|
||||||
|
"description": "As a developer, I need the Browser variant added to InstallCommand enum in cli.rs.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"Add Browser(InstallBrowserArgs) variant to InstallCommand enum in cli.rs",
|
||||||
|
"InstallBrowserArgs struct with --yes, --print-only, --package-manager flags",
|
||||||
|
"CLI dispatches to install_browser when 'install browser' is invoked",
|
||||||
|
"Add mod browser_install to lib.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 2,
|
||||||
|
"passes": true,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-003",
|
||||||
|
"title": "Add browser type definitions (DTOs and errors)",
|
||||||
|
"description": "As a developer, I need request/response types and error types for the browser API.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_types.rs with serde + utoipa + schemars derives",
|
||||||
|
"BrowserStartRequest with width, height, dpi, url, headless, contextId, streaming fields",
|
||||||
|
"BrowserStatusResponse with state, display, resolution, startedAt, cdpUrl, url, missingDependencies, installCommand, processes, lastError",
|
||||||
|
"BrowserState enum: Inactive, InstallRequired, Starting, Active, Stopping, Failed",
|
||||||
|
"BrowserPageInfo, BrowserTabInfo, BrowserNavigateRequest, BrowserScreenshotRequest, BrowserClickRequest, BrowserTypeRequest, BrowserCrawlRequest and all other request/response types from the spec",
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_errors.rs with BrowserProblem enum: NotActive, AlreadyActive, DesktopConflict, InstallRequired, StartFailed, CdpError, Timeout, NotFound, InvalidSelector",
|
||||||
|
"All errors return application/problem+json with tag:sandboxagent.dev,2025:browser/* URIs",
|
||||||
|
"Add mod browser_types and mod browser_errors to lib.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 3,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Mirror DesktopProblem patterns for error handling"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-004",
|
||||||
|
"title": "Add CdpClient for communicating with Chromium",
|
||||||
|
"description": "As a developer, I need a persistent WebSocket client to send CDP commands to Chromium.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_cdp.rs",
|
||||||
|
"CdpClient struct with WebSocket connection and AtomicU64 for request IDs",
|
||||||
|
"connect() method: connects to ws://127.0.0.1:9222/devtools/browser/{id} (discovered via http://127.0.0.1:9222/json/version)",
|
||||||
|
"send(method, params) method: sends CDP command and waits for matching response by ID",
|
||||||
|
"subscribe(event, callback) method for subscribing to CDP events like Runtime.consoleAPICalled and Network.requestWillBeSent",
|
||||||
|
"Add mod browser_cdp to lib.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 4,
|
||||||
|
"passes": true,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-005",
|
||||||
|
"title": "Add BrowserRuntime state machine",
|
||||||
|
"description": "As a developer, I need the core runtime that manages Xvfb + Chromium + Neko lifecycle.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_runtime.rs",
|
||||||
|
"BrowserRuntime struct with config, process_runtime, desktop_streaming_manager, desktop_recording_manager, cdp_client, inner state",
|
||||||
|
"BrowserRuntimeState with state, xvfb_process_id, chromium_process_id, display, resolution, started_at, last_error, console_messages (VecDeque max 1000), network_requests (VecDeque max 1000)",
|
||||||
|
"start() method: check deps, start Xvfb (reuse start_xvfb_locked), start Chromium with correct flags (--no-sandbox, --remote-debugging-port=9222, etc.), poll CDP /json/version until ready (15s timeout), optionally start Neko",
|
||||||
|
"stop() method: kill Chromium, stop Neko, stop Xvfb",
|
||||||
|
"status() method: return current BrowserStatusResponse",
|
||||||
|
"Check DesktopRuntime is not active before starting (mutual exclusivity), return 409 if conflict",
|
||||||
|
"Add BrowserRuntime to app state in state.rs",
|
||||||
|
"Add mod browser_runtime to lib.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 5,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Implemented as separate BrowserRuntime (not a mode of DesktopRuntime) for cleaner separation of concerns. Shares Xvfb start logic and DesktopStreamingManager. Mutual exclusivity checked via DesktopRuntime.status()."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-006",
|
||||||
|
"title": "Add browser lifecycle HTTP endpoints (start/stop/status)",
|
||||||
|
"description": "As a user, I need HTTP endpoints to start, stop, and check the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"POST /v1/browser/start endpoint: accepts BrowserStartRequest, returns BrowserStatusResponse",
|
||||||
|
"POST /v1/browser/stop endpoint: stops browser, returns { state: 'inactive' }",
|
||||||
|
"GET /v1/browser/status endpoint: returns BrowserStatusResponse",
|
||||||
|
"Routes registered in router.rs following existing patterns",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 6,
|
||||||
|
"passes": true,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-007",
|
||||||
|
"title": "Add CDP WebSocket proxy endpoint",
|
||||||
|
"description": "As a user, I need to connect Playwright/Puppeteer through the sandbox agent to Chromium's CDP.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/cdp endpoint: WebSocket upgrade",
|
||||||
|
"Bidirectional WebSocket relay proxying to ws://127.0.0.1:9222/devtools/browser/{id}",
|
||||||
|
"Follow same pattern as Neko signaling proxy in router.rs",
|
||||||
|
"External Playwright can connect via ws://sandbox-host:2468/v1/browser/cdp",
|
||||||
|
"Returns BrowserProblem::NotActive if browser not running",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 7,
|
||||||
|
"passes": true,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-008",
|
||||||
|
"title": "Add browser navigation endpoints",
|
||||||
|
"description": "As a user, I need HTTP endpoints for navigating the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"POST /v1/browser/navigate: accepts { url, waitUntil? }, returns { url, title, status } via CDP Page.navigate + Page.lifecycleEvent",
|
||||||
|
"POST /v1/browser/back: returns { url, title } via CDP Page.navigateHistory delta -1",
|
||||||
|
"POST /v1/browser/forward: returns { url, title } via CDP Page.navigateHistory delta 1",
|
||||||
|
"POST /v1/browser/reload: accepts { ignoreCache? }, returns { url, title }",
|
||||||
|
"POST /v1/browser/wait: accepts { selector?, timeout?, state? }, returns { found } using Runtime.evaluate with MutationObserver or DOM.querySelector polling",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 8,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Used get_cdp() (Arc<CdpClient>) pattern instead of with_cdp() closure to avoid async lifetime issues"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-009",
|
||||||
|
"title": "Add browser tab management endpoints",
|
||||||
|
"description": "As a user, I need to list, create, activate, and close browser tabs.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/tabs: returns { tabs: [{ id, url, title, active }] } via Target.getTargets filtered to type page",
|
||||||
|
"POST /v1/browser/tabs: accepts { url? }, returns { id, url, title } (201) via Target.createTarget",
|
||||||
|
"POST /v1/browser/tabs/:tab_id/activate: returns { id, url, title } via Target.activateTarget",
|
||||||
|
"DELETE /v1/browser/tabs/:tab_id: returns { ok: true } via Target.closeTarget",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 9,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Used Target.getTargets for listing, Target.createTarget/activateTarget/closeTarget for CRUD. Active tab detection uses Page.getNavigationHistory URL matching."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-010",
|
||||||
|
"title": "Add browser screenshot and PDF endpoints",
|
||||||
|
"description": "As a user, I need to capture screenshots and PDFs of browser pages.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/screenshot: query params format (png/jpeg/webp), quality, fullPage, selector; returns image binary with correct Content-Type via Page.captureScreenshot",
|
||||||
|
"GET /v1/browser/pdf: query params format (a4/letter/legal), landscape, printBackground, scale; returns application/pdf via Page.printToPDF",
|
||||||
|
"Both routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 10,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Browser screenshot is distinct from desktop screenshot - this captures the viewport/page via CDP, not the Xvfb display. Uses Page.captureScreenshot and Page.printToPDF CDP commands with base64 decoding."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-011",
|
||||||
|
"title": "Add browser content extraction endpoints (HTML, markdown, links, snapshot)",
|
||||||
|
"description": "As a user, I need to extract page content in various formats.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/content: query param selector?; returns { html, url, title } via Runtime.evaluate document.documentElement.outerHTML",
|
||||||
|
"GET /v1/browser/markdown: returns { markdown, url, title }; extract DOM via CDP, convert with html2md crate, strip nav/footer/aside",
|
||||||
|
"GET /v1/browser/links: returns { links: [{ href, text }], url }",
|
||||||
|
"GET /v1/browser/snapshot: returns { snapshot, url, title } via Accessibility.getFullAXTree",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 11,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Used html2md crate for HTML-to-Markdown conversion. Accessibility.getFullAXTree for snapshot. Runtime.evaluate for content/links extraction."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-012",
|
||||||
|
"title": "Add browser scrape and execute endpoints",
|
||||||
|
"description": "As a user, I need to scrape structured data and execute JavaScript in the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"POST /v1/browser/scrape: accepts { selectors: Record<string,string>, url? }; returns { data: Record<string,string[]>, url, title } via Runtime.evaluate with querySelectorAll + textContent",
|
||||||
|
"POST /v1/browser/execute: accepts { expression, awaitPromise? }; returns { result, type } via Runtime.evaluate",
|
||||||
|
"Both routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 12,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "Uses Runtime.evaluate with querySelectorAll for scrape, Runtime.evaluate with returnByValue for execute. Scrape optionally navigates first if url provided. Execute checks for exceptionDetails and returns CDP error."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-013",
|
||||||
|
"title": "Add browser interaction endpoints (click, type, select, hover, scroll)",
|
||||||
|
"description": "As a user, I need to interact with page elements via CSS selectors.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"POST /v1/browser/click: accepts { selector, button?, clickCount?, timeout? }; uses DOM.querySelector + DOM.getBoxModel + Input.dispatchMouseEvent",
|
||||||
|
"POST /v1/browser/type: accepts { selector, text, delay?, clear? }; uses DOM.focus + Input.dispatchKeyEvent",
|
||||||
|
"POST /v1/browser/select: accepts { selector, value }",
|
||||||
|
"POST /v1/browser/hover: accepts { selector }",
|
||||||
|
"POST /v1/browser/scroll: accepts { selector?, x?, y? }",
|
||||||
|
"All return { ok: true } on success",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 13,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "These are browser-level (CDP) interactions, distinct from desktop xdotool input. Click/hover use DOM.querySelector + DOM.getBoxModel + Input.dispatchMouseEvent. Type uses DOM.focus + Input.dispatchKeyEvent. Select/scroll use Runtime.evaluate."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-014",
|
||||||
|
"title": "Add browser upload and dialog endpoints",
|
||||||
|
"description": "As a user, I need to upload files and handle JavaScript dialogs.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"POST /v1/browser/upload: accepts { selector, path }; uses DOM.setFileInputFiles; returns { ok: true }",
|
||||||
|
"POST /v1/browser/dialog: accepts { accept, text? }; uses Page.handleJavaScriptDialog; returns { ok: true }",
|
||||||
|
"Both routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 14,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "DOM.setFileInputFiles for upload (requires DOM.querySelector to find node), Page.handleJavaScriptDialog for dialog with optional promptText parameter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-015",
|
||||||
|
"title": "Add browser console and network monitoring endpoints",
|
||||||
|
"description": "As a user, I need to see console logs and network requests from the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/console: query params level?, limit? (default 100); returns { messages: [{ level, text, url?, line?, timestamp }] }",
|
||||||
|
"GET /v1/browser/network: query params limit?, urlPattern?; returns { requests: [{ url, method, status, mimeType, responseSize, duration, timestamp }] }",
|
||||||
|
"BrowserRuntime subscribes to Runtime.consoleAPICalled and Network.requestWillBeSent + Network.responseReceived events via CdpClient",
|
||||||
|
"Messages/requests buffered in bounded ring buffers (max 1000 each) in BrowserRuntimeState",
|
||||||
|
"Both routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 15,
|
||||||
|
"passes": true,
|
||||||
|
"notes": "CDP events (Runtime.consoleAPICalled, Network.requestWillBeSent, Network.responseReceived) subscribed in start(). Network requests correlated via internal request_id field. Background tokio tasks populate ring buffers."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-016",
|
||||||
|
"title": "Add browser context (persistent profile) management",
|
||||||
|
"description": "As a user, I need to create and manage persistent browser profiles for cookies/storage.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_context.rs",
|
||||||
|
"GET /v1/browser/contexts: returns { contexts: [{ id, name, createdAt, sizeBytes }] }",
|
||||||
|
"POST /v1/browser/contexts: accepts { name }; creates user-data-dir at $STATE_DIR/browser-contexts/{id}/; returns { id, name, createdAt } (201)",
|
||||||
|
"DELETE /v1/browser/contexts/:context_id: deletes context directory",
|
||||||
|
"POST /v1/browser/start accepts contextId to set --user-data-dir to context's directory",
|
||||||
|
"Add mod browser_context to lib.rs",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 16,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-017",
|
||||||
|
"title": "Add browser cookie management endpoints",
|
||||||
|
"description": "As a user, I need to get, set, and clear cookies in the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"GET /v1/browser/cookies: query param url?; returns { cookies: [{ name, value, domain, path, expires, httpOnly, secure, sameSite }] } via Network.getCookies",
|
||||||
|
"POST /v1/browser/cookies: accepts { cookies: [...] }; uses Network.setCookies; returns { ok: true }",
|
||||||
|
"DELETE /v1/browser/cookies: query params name?, domain?; clears matching cookies",
|
||||||
|
"All routes registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 17,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-018",
|
||||||
|
"title": "Add browser crawl endpoint",
|
||||||
|
"description": "As a user, I need to crawl multiple pages starting from a URL.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/src/browser_crawl.rs",
|
||||||
|
"POST /v1/browser/crawl: accepts { url, maxPages? (default 10, max 100), maxDepth? (default 2), allowedDomains?, extract? (markdown|html|text|links) }",
|
||||||
|
"Returns { pages: [{ url, title, content, links, status, depth }], totalPages, truncated }",
|
||||||
|
"BFS crawl implementation: navigate, wait for load, extract content, collect links, filter by domain/depth",
|
||||||
|
"Add mod browser_crawl to lib.rs",
|
||||||
|
"Route registered in router.rs",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 18,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-019",
|
||||||
|
"title": "Add TypeScript SDK browser types",
|
||||||
|
"description": "As a developer, I need TypeScript type definitions for all browser API requests and responses.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file sdks/typescript/src/types/browser.ts with all interfaces: BrowserStartRequest, BrowserStatusResponse, BrowserTabInfo, BrowserPageInfo, BrowserNavigateRequest, BrowserScreenshotRequest, BrowserClickRequest, BrowserTypeRequest, BrowserCrawlRequest, BrowserCrawlResponse, BrowserContextInfo, BrowserCookie, and all other request/response types",
|
||||||
|
"Types exported from sdks/typescript/src/types/index.ts (or equivalent barrel)",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 19,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-020",
|
||||||
|
"title": "Add TypeScript SDK browser lifecycle and CDP methods",
|
||||||
|
"description": "As a developer, I need SDK methods for browser lifecycle and CDP access.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"startBrowser(request?) method on SandboxAgent class calling POST /v1/browser/start",
|
||||||
|
"stopBrowser() method calling POST /v1/browser/stop",
|
||||||
|
"getBrowserStatus() method calling GET /v1/browser/status",
|
||||||
|
"getBrowserCdpUrl() method returning ws://host:port/v1/browser/cdp",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 20,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-021",
|
||||||
|
"title": "Add TypeScript SDK browser navigation and tab methods",
|
||||||
|
"description": "As a developer, I need SDK methods for navigating and managing tabs.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"browserNavigate(request) method calling POST /v1/browser/navigate",
|
||||||
|
"browserBack() method calling POST /v1/browser/back",
|
||||||
|
"browserForward() method calling POST /v1/browser/forward",
|
||||||
|
"browserReload(request?) method calling POST /v1/browser/reload",
|
||||||
|
"browserWait(request) method calling POST /v1/browser/wait",
|
||||||
|
"getBrowserTabs() method calling GET /v1/browser/tabs",
|
||||||
|
"createBrowserTab(request?) method calling POST /v1/browser/tabs",
|
||||||
|
"activateBrowserTab(tabId) method calling POST /v1/browser/tabs/:id/activate",
|
||||||
|
"closeBrowserTab(tabId) method calling DELETE /v1/browser/tabs/:id",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 21,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-022",
|
||||||
|
"title": "Add TypeScript SDK browser content extraction methods",
|
||||||
|
"description": "As a developer, I need SDK methods for extracting page content.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"takeBrowserScreenshot(request?) returning Promise<Uint8Array> calling GET /v1/browser/screenshot",
|
||||||
|
"getBrowserPdf(request?) returning Promise<Uint8Array> calling GET /v1/browser/pdf",
|
||||||
|
"getBrowserContent(request?) calling GET /v1/browser/content",
|
||||||
|
"getBrowserMarkdown() calling GET /v1/browser/markdown",
|
||||||
|
"scrapeBrowser(request) calling POST /v1/browser/scrape",
|
||||||
|
"getBrowserLinks() calling GET /v1/browser/links",
|
||||||
|
"executeBrowserScript(request) calling POST /v1/browser/execute",
|
||||||
|
"getBrowserSnapshot() calling GET /v1/browser/snapshot",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 22,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-023",
|
||||||
|
"title": "Add TypeScript SDK browser interaction methods",
|
||||||
|
"description": "As a developer, I need SDK methods for interacting with page elements.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"browserClick(request) calling POST /v1/browser/click",
|
||||||
|
"browserType(request) calling POST /v1/browser/type",
|
||||||
|
"browserSelect(request) calling POST /v1/browser/select",
|
||||||
|
"browserHover(request) calling POST /v1/browser/hover",
|
||||||
|
"browserScroll(request) calling POST /v1/browser/scroll",
|
||||||
|
"browserUpload(request) calling POST /v1/browser/upload",
|
||||||
|
"browserDialog(request) calling POST /v1/browser/dialog",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 23,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-024",
|
||||||
|
"title": "Add TypeScript SDK browser monitoring, crawl, context, and cookie methods",
|
||||||
|
"description": "As a developer, I need SDK methods for monitoring, crawling, contexts, and cookies.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"getBrowserConsole(request?) calling GET /v1/browser/console",
|
||||||
|
"getBrowserNetwork(request?) calling GET /v1/browser/network",
|
||||||
|
"crawlBrowser(request) calling POST /v1/browser/crawl",
|
||||||
|
"getBrowserContexts() calling GET /v1/browser/contexts",
|
||||||
|
"createBrowserContext(request) calling POST /v1/browser/contexts",
|
||||||
|
"deleteBrowserContext(contextId) calling DELETE /v1/browser/contexts/:id",
|
||||||
|
"getBrowserCookies(request?) calling GET /v1/browser/cookies",
|
||||||
|
"setBrowserCookies(request) calling POST /v1/browser/cookies",
|
||||||
|
"deleteBrowserCookies(request?) calling DELETE /v1/browser/cookies",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 24,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-025",
|
||||||
|
"title": "Add BrowserViewer React component",
|
||||||
|
"description": "As a developer, I need a reusable React component for embedding the browser stream with navigation.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file sdks/react/src/BrowserViewer.tsx",
|
||||||
|
"BrowserViewerProps interface with client, className, style, height, showNavigationBar (default true), showStatusBar (default true), onNavigate, onConnect, onDisconnect, onError",
|
||||||
|
"BrowserViewerClient type using Pick from SandboxAgent: connectDesktopStream, browserNavigate, browserBack, browserForward, browserReload, getBrowserStatus",
|
||||||
|
"Wraps DesktopViewer with navigation bar (back, forward, reload buttons + URL input)",
|
||||||
|
"Exported from sdks/react/src/index.ts",
|
||||||
|
"Typecheck passes"
|
||||||
|
],
|
||||||
|
"priority": 25,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-026",
|
||||||
|
"title": "Add Browser tab to Inspector UI - runtime control and live view sections",
|
||||||
|
"description": "As a user, I need a Browser tab in the inspector for controlling and viewing the browser.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file frontend/packages/inspector/src/components/debug/BrowserTab.tsx",
|
||||||
|
"Tab registered in DebugPanel.tsx with Globe icon from lucide-react, added after desktop tab",
|
||||||
|
"DebugTab type updated to include 'browser'",
|
||||||
|
"Section 1 - Runtime Control: state pill, status grid (URL, Resolution, Started), config inputs (Width, Height, URL, Context dropdown), Start/Stop buttons, auto-refresh every 5s when active",
|
||||||
|
"Section 2 - Live View: navigation bar (Back, Forward, Reload + URL input), DesktopViewer component for WebRTC stream, current URL display",
|
||||||
|
"Typecheck passes",
|
||||||
|
"Verify in browser using dev-browser skill"
|
||||||
|
],
|
||||||
|
"priority": 26,
|
||||||
|
"passes": false,
|
||||||
|
"notes": "Follow DesktopTab.tsx patterns for card layout and state management"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-027",
|
||||||
|
"title": "Add Browser tab - screenshot, tabs, and console sections",
|
||||||
|
"description": "As a user, I need screenshot capture, tab management, and console viewing in the inspector Browser tab.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"Section 3 - Screenshot: format selector (PNG/JPEG/WebP), quality input, fullPage checkbox, selector input, screenshot button + preview image",
|
||||||
|
"Section 4 - Tabs: list of open tabs with URL and title, active tab highlighted, per-tab Activate/Close buttons, New Tab button with URL input",
|
||||||
|
"Section 5 - Console: level filter pills (All/Log/Warn/Error/Info), scrollable message list with level-colored indicators, auto-refresh every 3s, Clear button",
|
||||||
|
"Typecheck passes",
|
||||||
|
"Verify in browser using dev-browser skill"
|
||||||
|
],
|
||||||
|
"priority": 27,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-028",
|
||||||
|
"title": "Add Browser tab - network, content tools, recording, contexts, diagnostics sections",
|
||||||
|
"description": "As a user, I need network monitoring, content extraction, recording, context management, and diagnostics in the inspector.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"Section 6 - Network: request list (method, URL, status, size, duration), URL pattern filter, auto-refresh every 3s",
|
||||||
|
"Section 7 - Content Tools: Get HTML, Get Markdown, Get Links, Get Snapshot buttons with output textarea",
|
||||||
|
"Section 8 - Recording: reuse desktop recording UI pattern (start/stop, FPS input, recording list with download/delete)",
|
||||||
|
"Section 9 - Contexts: list contexts with name/date/size, create form, delete button, Use button to set contextId",
|
||||||
|
"Section 10 - Diagnostics: last error details, process list (Xvfb, Chromium, Neko) with PIDs and running state",
|
||||||
|
"Typecheck passes",
|
||||||
|
"Verify in browser using dev-browser skill"
|
||||||
|
],
|
||||||
|
"priority": 28,
|
||||||
|
"passes": false,
|
||||||
|
"notes": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "US-029",
|
||||||
|
"title": "Add browser API integration tests",
|
||||||
|
"description": "As a developer, I need integration tests for the browser HTTP API.",
|
||||||
|
"acceptanceCriteria": [
|
||||||
|
"New file server/packages/sandbox-agent/tests/browser_api.rs",
|
||||||
|
"Test lifecycle: start, status shows active, stop, status shows inactive",
|
||||||
|
"Test navigation: navigate to test page, back, forward, reload",
|
||||||
|
"Test tabs: create tab, list shows 2 tabs, activate second, close first",
|
||||||
|
"Test screenshots: capture PNG, JPEG, WebP; verify non-empty binary response",
|
||||||
|
"Test content extraction: HTML contains expected elements, markdown is non-empty, links extracted",
|
||||||
|
"Test interaction: click button, type in input, verify page state changed",
|
||||||
|
"Test contexts: create context, list shows it, delete context",
|
||||||
|
"Tests use static test HTML pages served from within the sandbox (no network deps)",
|
||||||
|
"Update docker/test-agent/Dockerfile to include Chromium if not already present",
|
||||||
|
"Tests pass"
|
||||||
|
],
|
||||||
|
"priority": 29,
|
||||||
|
"passes": false,
|
||||||
|
"notes": "Run with: cargo test -p sandbox-agent --test browser_api"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
290
scripts/ralph/progress.txt
Normal file
290
scripts/ralph/progress.txt
Normal file
|
|
@ -0,0 +1,290 @@
|
||||||
|
## Codebase Patterns
|
||||||
|
- `desktop_install.rs` contains shared install helpers (detect_package_manager, find_binary, running_as_root, prompt_yes_no, render_install_command, run_install_commands) now pub(crate) for reuse by browser_install.rs
|
||||||
|
- `DesktopPackageManager` enum (Apt/Dnf/Apk) is the canonical package manager type, reused across install modules
|
||||||
|
- New modules must be registered in `lib.rs` with `mod module_name;`
|
||||||
|
- Unit tests go inside the module file under `#[cfg(test)] mod tests`
|
||||||
|
- Leftook pre-commit hook runs rustfmt automatically; code may be reformatted on commit
|
||||||
|
- CLI install subcommand pattern: enum variant in `InstallCommand`, `#[derive(Args)]` struct, local wrapper fn `install_X_local`, match arm in `run_install`
|
||||||
|
- DTO pattern: `#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]` + `#[serde(rename_all = "camelCase")]`; use `IntoParams` for query param structs, `Default` for optional request bodies
|
||||||
|
- Error pattern: struct with constructor methods (not enum), `to_problem_details()` converts to ProblemDetails for HTTP response
|
||||||
|
- Browser types reuse `DesktopResolution`, `DesktopProcessInfo`, `DesktopErrorInfo` from `desktop_types.rs`
|
||||||
|
- CDP client pattern: `tokio_tungstenite::connect_async` for WS, `futures::SplitSink/SplitStream` for split, `tokio::sync::Mutex` for shared WS sender, `oneshot` channels for request/response, `mpsc::unbounded_channel` for event subscriptions
|
||||||
|
- WebSocket text messages use `Utf8Bytes` in tungstenite 0.24; use `.into()` for String->Utf8Bytes and `.to_string()` for Utf8Bytes->String
|
||||||
|
- No new crate dependencies needed for WebSocket CDP client; `tokio-tungstenite`, `reqwest`, `futures` already in Cargo.toml
|
||||||
|
- BrowserRuntime pattern: separate struct from DesktopRuntime, shares Xvfb start logic and DesktopStreamingManager; mutual exclusivity checked via `desktop_runtime.status().await`
|
||||||
|
- AppState in `router.rs` (not state.rs): add field, create in `with_branding()`, add accessor method
|
||||||
|
- `ProcessOwner::Desktop` is reused for browser processes (there's no `ProcessOwner::Browser` variant)
|
||||||
|
- Browser uses display :98 by default (desktop uses :99) to avoid conflicts
|
||||||
|
- `with_cdp()` async closure pattern for safe CDP access through Mutex-guarded state
|
||||||
|
- New error types need `impl From<ErrorType> for ApiError` in router.rs before handlers can use `?` on them
|
||||||
|
- Browser routes go between desktop stream routes and `/agents` routes in v1_router
|
||||||
|
- WebSocket proxy pattern: handler validates precondition with `ensure_active()`, calls `ws.on_upgrade()` with session fn; session fn discovers upstream WS URL, connects, runs bidirectional `tokio::select!` relay loop
|
||||||
|
- `BrowserRuntime::ensure_active()` is a reusable guard for any handler requiring active browser state
|
||||||
|
- `BrowserRuntime::get_cdp()` returns `Arc<CdpClient>` without holding state lock; preferred over `with_cdp()` closure for handlers that do multiple async CDP calls (avoids lifetime issues)
|
||||||
|
- `CdpClient::close()` takes `&self` (not `self`); CdpClient is stored as `Option<Arc<CdpClient>>` in BrowserRuntimeStateData
|
||||||
|
- `get_page_info_via_cdp()` is a helper fn in router.rs for getting current URL and title via Runtime.evaluate
|
||||||
|
- CDP `Page.getNavigationHistory` returns `{currentIndex, entries: [{id, url, title}]}` for back/forward navigation
|
||||||
|
- CDP `Page.navigateToHistoryEntry` takes `{entryId}` (the id from history entries, not the index)
|
||||||
|
- CDP `Target.getTargets` returns `{targetInfos: [{targetId, url, title, type, ...}]}`; filter `type == "page"` for browser tabs
|
||||||
|
- CDP `Target.createTarget` takes `{url}`, returns `{targetId}`; `Target.closeTarget` takes `{targetId}`, returns `{success: bool}`
|
||||||
|
- For 201 responses, handler returns `(StatusCode, Json<T>)` tuple; axum handles the tuple as status + body
|
||||||
|
- CDP screenshot/PDF commands return base64-encoded data in `{data: "..."}` field; decode with `base64::engine::general_purpose::STANDARD`
|
||||||
|
- Binary response pattern: `Result<Response, ApiError>` with `([(header::CONTENT_TYPE, "image/png")], Bytes::from(bytes)).into_response()`
|
||||||
|
- `html2md::parse_html()` for HTML-to-Markdown conversion; crate added as `html2md = "0.2"` in Cargo.toml
|
||||||
|
- CDP `Accessibility.getFullAXTree` returns `{nodes: [{role: {value: "..."}, name: {value: "..."}, ...}]}`; filter out "none" and "GenericContainer" roles for readable output
|
||||||
|
- For DOM extraction via CDP, clone body first (`document.body.cloneNode(true)`) to avoid mutating live page when stripping elements
|
||||||
|
- For multi-selector scraping, serialize selector map to JSON, embed in a single Runtime.evaluate JS expression, return JSON string (avoids multiple CDP round trips)
|
||||||
|
- Runtime.evaluate `exceptionDetails` field indicates JS errors; check it before reading `result` in execute endpoints
|
||||||
|
- CDP element interaction pattern: DOM.getDocument → DOM.querySelector → DOM.getBoxModel (for coordinates) → Input.dispatchMouseEvent; content array is [x1,y1,x2,y2,x3,y3,x4,y4]
|
||||||
|
- For simple DOM manipulation (select value, scroll), Runtime.evaluate with inline JS is simpler than CDP DOM commands
|
||||||
|
- CDP `DOM.setFileInputFiles` takes `{files: [path], nodeId}` for file upload; requires DOM.querySelector to find the input node first
|
||||||
|
- CDP `Page.handleJavaScriptDialog` takes `{accept, promptText?}` for alert/confirm/prompt handling; no DOM setup needed
|
||||||
|
- CDP event monitoring pattern: `Runtime.enable` + `Network.enable` in start(), subscribe via `cdp.subscribe(event)`, spawn tokio tasks to populate ring buffers; tasks auto-terminate when CDP connection closes
|
||||||
|
- For internal-only fields in API types, use `#[serde(default, skip_serializing)]` to keep them out of JSON responses
|
||||||
|
|
||||||
|
# Ralph Progress Log
|
||||||
|
Started: Tue Mar 17 04:32:06 AM PDT 2026
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-001
|
||||||
|
- Implemented `browser_install.rs` with BrowserInstallRequest, install_browser(), browser_packages(), detect_missing_browser_dependencies(), browser_platform_support_message()
|
||||||
|
- Made shared helpers in `desktop_install.rs` pub(crate): detect_package_manager, find_binary, running_as_root, prompt_yes_no, render_install_command, run_install_commands
|
||||||
|
- APT packages: chromium, chromium-sandbox, libnss3, libatk-bridge2.0-0, libdrm2, libxcomposite1, libxdamage1, libxrandr2, libgbm1, libasound2, libpangocairo-1.0-0, libgtk-3-0
|
||||||
|
- DNF packages: chromium
|
||||||
|
- APK packages: chromium, nss
|
||||||
|
- Files changed: browser_install.rs (new), desktop_install.rs (pub(crate) visibility), lib.rs (mod registration)
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- Helper functions in desktop_install.rs were all private; had to make them pub(crate) for cross-module reuse
|
||||||
|
- find_binary is also duplicated in desktop_runtime.rs (its own local copy); consider consolidating in the future
|
||||||
|
- cargo test --lib is needed to run unit tests inside private modules
|
||||||
|
- Pre-existing dead_code warnings are normal in this codebase; don't be alarmed by them
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-002
|
||||||
|
- Added `Browser(InstallBrowserArgs)` variant to `InstallCommand` enum in cli.rs
|
||||||
|
- Added `InstallBrowserArgs` struct with `--yes`, `--print-only`, `--package-manager` flags
|
||||||
|
- Added `install_browser_local` dispatch function mirroring `install_desktop_local`
|
||||||
|
- Imported `install_browser` and `BrowserInstallRequest` from `browser_install` module
|
||||||
|
- Files changed: cli.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CLI dispatch pattern: enum variant in `InstallCommand`, args struct with `#[derive(Args, Debug)]`, local wrapper fn, match arm in `run_install`
|
||||||
|
- `DesktopPackageManager` is reused for browser args too (same `value_enum` derive)
|
||||||
|
- `mod browser_install` was already in lib.rs from US-001
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-003
|
||||||
|
- Created `browser_types.rs` with all browser API DTOs: BrowserState, BrowserStartRequest, BrowserStatusResponse, BrowserNavigateRequest, BrowserPageInfo, BrowserReloadRequest, BrowserWaitRequest, BrowserTabInfo, BrowserTabListResponse, BrowserCreateTabRequest, BrowserScreenshotQuery, BrowserPdfQuery, BrowserContentQuery/Response, BrowserMarkdownResponse, BrowserLinkInfo, BrowserLinksResponse, BrowserSnapshotResponse, BrowserScrapeRequest/Response, BrowserExecuteRequest/Response, BrowserClickRequest, BrowserTypeRequest, BrowserSelectRequest, BrowserHoverRequest, BrowserScrollRequest, BrowserUploadRequest, BrowserDialogRequest, BrowserActionResponse, BrowserConsoleQuery/Message/Response, BrowserNetworkQuery/Request/Response, BrowserCrawlRequest/Page/Response, BrowserContextInfo/ListResponse/CreateRequest, BrowserCookie, BrowserCookiesQuery/Response, BrowserSetCookiesRequest, BrowserDeleteCookiesQuery
|
||||||
|
- Created `browser_errors.rs` with BrowserProblem struct: not_active (409), already_active (409), desktop_conflict (409), install_required (424), start_failed (500), cdp_error (502), timeout (504), not_found (404), invalid_selector (400)
|
||||||
|
- Error URIs use `tag:sandboxagent.dev,2025:browser/*` format per spec
|
||||||
|
- Registered `mod browser_errors` and `pub mod browser_types` in lib.rs
|
||||||
|
- 4 unit tests for BrowserProblem pass
|
||||||
|
- Files changed: browser_types.rs (new), browser_errors.rs (new), lib.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- browser_types.rs reuses DesktopResolution, DesktopProcessInfo, DesktopErrorInfo from desktop_types.rs (no duplication)
|
||||||
|
- BrowserProblem follows the same struct+constructor pattern as DesktopProblem, not an enum
|
||||||
|
- Error type URIs differ: DesktopProblem uses `urn:sandbox-agent:error:{code}`, BrowserProblem uses `tag:sandboxagent.dev,2025:browser/{code}` per the spec
|
||||||
|
- `pub mod browser_types` makes types available for re-export (like desktop_types), while `mod browser_errors` is private (internal only)
|
||||||
|
- HashMap requires `use std::collections::HashMap` and serde_json::Value for dynamic types
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-004
|
||||||
|
- Created `browser_cdp.rs` with `CdpClient` struct for Chrome DevTools Protocol communication
|
||||||
|
- `CdpClient` fields: `ws_sender` (Arc<Mutex<SplitSink>>), `next_id` (AtomicU64), `pending` (HashMap<u64, oneshot::Sender>), `subscribers` (HashMap<String, Vec<UnboundedSender>>), `reader_task` (JoinHandle)
|
||||||
|
- `connect()`: discovers WS URL via `http://127.0.0.1:9222/json/version`, connects WebSocket, spawns background reader task
|
||||||
|
- `send(method, params)`: assigns incrementing ID, sends JSON-RPC style CDP command, waits for matching response with 30s timeout
|
||||||
|
- `subscribe(event)`: returns `mpsc::UnboundedReceiver<Value>` that receives event params; subscriptions auto-clean on receiver drop
|
||||||
|
- `reader_loop`: background task routes responses to pending requests by ID, broadcasts events to subscribers, fails all pending on connection close
|
||||||
|
- `close()`: aborts reader task and closes WebSocket; `Drop` impl also aborts reader task
|
||||||
|
- Registered `mod browser_cdp` in lib.rs
|
||||||
|
- Files changed: browser_cdp.rs (new), lib.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- tokio-tungstenite 0.24 uses `Utf8Bytes` for `Message::Text`, not `String`; use `.into()` to convert String->Utf8Bytes when sending, `.to_string()` when parsing received text
|
||||||
|
- CDP responses have `id` field (matched to pending requests), events have `method` but no `id` (routed to subscribers)
|
||||||
|
- CDP errors in responses are `{"id": N, "error": {"code": -32000, "message": "..."}}` - extract `error.message` string
|
||||||
|
- `tokio::sync::Mutex` needed for WS sender since we await (send) while holding the lock; standard Mutex would deadlock
|
||||||
|
- `reqwest` already available for the HTTP discovery call to `/json/version`
|
||||||
|
- The `subscribe` method returns a channel receiver (Rust-idiomatic) rather than taking a callback
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-005
|
||||||
|
- Created `browser_runtime.rs` with `BrowserRuntime` struct for managing Xvfb + Chromium + Neko lifecycle
|
||||||
|
- `BrowserRuntime` fields: config (BrowserRuntimeConfig), process_runtime, desktop_runtime, streaming_manager (DesktopStreamingManager), inner (Arc<Mutex<BrowserRuntimeStateData>>)
|
||||||
|
- `BrowserRuntimeStateData` with state, display, resolution, started_at, last_error, xvfb/chromium (ManagedBrowserProcess), cdp_client (CdpClient), context_id, console_messages (VecDeque max 1000), network_requests (VecDeque max 1000)
|
||||||
|
- `start()`: checks desktop mutual exclusivity, validates platform/deps, starts Xvfb (non-headless), starts Chromium with correct flags (--no-sandbox, --remote-debugging-port=9222, etc.), polls CDP /json/version (15s timeout), connects CdpClient, optionally starts Neko streaming
|
||||||
|
- `stop()`: closes CDP client, stops streaming, stops Chromium, stops Xvfb, resets state
|
||||||
|
- `status()`: refreshes process health, returns BrowserStatusResponse with cdp_url, processes, etc.
|
||||||
|
- `with_cdp()`: async closure pattern for safe CDP access through Mutex-guarded state
|
||||||
|
- Ring buffer methods: push_console_message, push_network_request, console_messages, network_requests
|
||||||
|
- Added BrowserRuntime to AppState in router.rs with accessor method
|
||||||
|
- Registered `mod browser_runtime` in lib.rs
|
||||||
|
- 4 unit tests pass
|
||||||
|
- Files changed: browser_runtime.rs (new), router.rs, lib.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- AppState is defined in router.rs, not in a separate state.rs file
|
||||||
|
- DesktopRuntime and BrowserRuntime both use ProcessOwner::Desktop (no separate Browser variant exists)
|
||||||
|
- Browser default display is :98 to avoid conflict with desktop's :99
|
||||||
|
- Cannot return a reference to CdpClient from Mutex-guarded state; use `with_cdp()` closure pattern instead
|
||||||
|
- `DesktopStreamingManager` is reusable for browser streaming since it just wraps neko on an X display
|
||||||
|
- Chromium binary can be `chromium`, `chromium-browser`, `google-chrome`, or `google-chrome-stable`
|
||||||
|
- Headless mode uses `--headless=new` (new Chrome headless mode, not old `--headless`)
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-006
|
||||||
|
- Added browser lifecycle HTTP endpoints: GET /v1/browser/status, POST /v1/browser/start, POST /v1/browser/stop
|
||||||
|
- Added `From<BrowserProblem> for ApiError` conversion for error handling
|
||||||
|
- Added browser imports (`browser_errors::BrowserProblem`, `browser_types::*`) to router.rs
|
||||||
|
- Registered browser handler paths and schemas (BrowserState, BrowserStartRequest, BrowserStatusResponse) in OpenAPI derive
|
||||||
|
- Handler functions follow identical pattern to desktop start/stop/status with utoipa doc comments
|
||||||
|
- Files changed: router.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- Browser route handlers follow exact same pattern as desktop: State extractor, optional Json body, Result<Json<Response>, ApiError>
|
||||||
|
- `From<BrowserProblem> for ApiError` is needed before browser handlers can use `?` on BrowserProblem results
|
||||||
|
- OpenAPI registration requires both `paths(...)` entries for handlers and `schemas(...)` entries for types
|
||||||
|
- Browser routes placed after desktop stream routes but before `/agents` routes in the v1_router chain
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-007
|
||||||
|
- Added GET /v1/browser/cdp WebSocket upgrade endpoint for CDP proxy
|
||||||
|
- Added `browser_cdp_ws_session` bidirectional relay function (client ↔ Chromium CDP)
|
||||||
|
- Added `ensure_active()` and `cdp_ws_url()` methods to BrowserRuntime
|
||||||
|
- CDP WS URL discovered dynamically via `http://127.0.0.1:9222/json/version` (same as CdpClient::connect)
|
||||||
|
- Follows identical pattern to Neko signaling proxy (WebSocketUpgrade, tokio::select! relay loop)
|
||||||
|
- Route registered at `/browser/cdp` in v1_router and OpenAPI paths
|
||||||
|
- Files changed: router.rs, browser_runtime.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- WebSocket proxy pattern: handler validates precondition before upgrade, session function handles connection + relay
|
||||||
|
- CDP proxy is simpler than Neko proxy: no session cookie/auth needed, just discover WS URL and connect
|
||||||
|
- `ensure_active()` is a reusable guard method on BrowserRuntime for any handler that requires active browser
|
||||||
|
- `cdp_ws_url()` discovers the full WS URL including browser ID from `/json/version` endpoint
|
||||||
|
- The `futures::StreamExt` import for `.next()` on streams is already global in router.rs
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-008
|
||||||
|
- Added 5 browser navigation HTTP endpoints: POST /v1/browser/navigate, POST /v1/browser/back, POST /v1/browser/forward, POST /v1/browser/reload, POST /v1/browser/wait
|
||||||
|
- Added `get_page_info_via_cdp()` helper function for retrieving current URL and title via Runtime.evaluate
|
||||||
|
- Added `get_cdp()` method to BrowserRuntime returning `Arc<CdpClient>` for lock-free CDP access
|
||||||
|
- Changed `CdpClient::close()` from `close(self)` to `close(&self)` to support Arc wrapping
|
||||||
|
- Changed `cdp_client` field in BrowserRuntimeStateData from `Option<CdpClient>` to `Option<Arc<CdpClient>>`
|
||||||
|
- Registered all 5 routes in v1_router and OpenAPI paths/schemas (BrowserNavigateRequest, BrowserNavigateWaitUntil, BrowserPageInfo, BrowserReloadRequest, BrowserWaitRequest, BrowserWaitState, BrowserWaitResponse)
|
||||||
|
- Files changed: router.rs, browser_runtime.rs, browser_cdp.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- `with_cdp()` closure pattern has async lifetime issues: the `&CdpClient` reference from the closure cannot be borrowed across await points in the async block. Use `get_cdp()` which returns `Arc<CdpClient>` instead.
|
||||||
|
- CDP `Page.navigate` doesn't return HTTP status directly; check for `frameId` presence as success indicator
|
||||||
|
- CDP `Page.getNavigationHistory` + `Page.navigateToHistoryEntry` is the correct way to implement back/forward (not `Page.navigateHistory` which doesn't exist)
|
||||||
|
- `Runtime.evaluate` with `returnByValue: true` is the simplest way to get page info (URL, title) and check DOM state
|
||||||
|
- For the wait endpoint, polling with `Runtime.evaluate` is simpler and more reliable than MutationObserver for cross-connection CDP
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-009
|
||||||
|
- Implemented 4 browser tab management HTTP endpoints: GET /v1/browser/tabs, POST /v1/browser/tabs, POST /v1/browser/tabs/:tab_id/activate, DELETE /v1/browser/tabs/:tab_id
|
||||||
|
- GET lists tabs via `Target.getTargets` filtered to type "page", with active tab detection via `Page.getNavigationHistory` URL matching
|
||||||
|
- POST creates tabs via `Target.createTarget`, returns 201 with tab info
|
||||||
|
- POST activate uses `Target.activateTarget`, DELETE uses `Target.closeTarget`
|
||||||
|
- All routes registered in v1_router and OpenAPI paths/schemas
|
||||||
|
- Files changed: router.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CDP `Target.getTargets` returns `targetInfos` array with objects containing `targetId`, `url`, `title`, `type`
|
||||||
|
- CDP `Target.createTarget` takes `{url}` and returns `{targetId}`
|
||||||
|
- CDP `Target.closeTarget` takes `{targetId}` and returns `{success: bool}`
|
||||||
|
- CDP `Target.activateTarget` takes `{targetId}` and returns empty result
|
||||||
|
- For 201 status code responses, return `(StatusCode, Json<T>)` tuple from the handler
|
||||||
|
- Active tab detection is tricky: `Page.getNavigationHistory` operates on the currently attached target, so matching by URL is an approximation
|
||||||
|
- Combined `get().post()` route registration works for same path with different HTTP methods
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-010
|
||||||
|
- Implemented GET /v1/browser/screenshot and GET /v1/browser/pdf endpoints
|
||||||
|
- Screenshot supports format (png/jpeg/webp), quality, fullPage, and selector query params
|
||||||
|
- PDF supports format (a4/letter/legal), landscape, printBackground, scale query params
|
||||||
|
- Both use CDP commands (Page.captureScreenshot, Page.printToPDF) and decode base64 response data
|
||||||
|
- Routes registered in v1_router and OpenAPI paths/schemas
|
||||||
|
- Files changed: router.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CDP `Page.captureScreenshot` returns `{data: "base64-string"}` with format/quality/clip/captureBeyondViewport params
|
||||||
|
- CDP `Page.printToPDF` returns `{data: "base64-string"}` with paperWidth/paperHeight in inches, landscape, printBackground, scale params
|
||||||
|
- Paper sizes in inches: A4 = 8.27x11.69, Letter = 8.5x11, Legal = 8.5x14
|
||||||
|
- For binary response handlers, return `Result<Response, ApiError>` with `([(header::CONTENT_TYPE, content_type_str)], Bytes::from(bytes)).into_response()`
|
||||||
|
- `base64` crate already available as workspace dependency; use `base64::engine::general_purpose::STANDARD` for decoding CDP data
|
||||||
|
- For selector-based screenshot clips, use `Runtime.evaluate` to get bounding box via `getBoundingClientRect()` then pass as `clip` param
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-011
|
||||||
|
- Implemented 4 browser content extraction GET endpoints: /v1/browser/content, /v1/browser/markdown, /v1/browser/links, /v1/browser/snapshot
|
||||||
|
- GET /v1/browser/content: extracts outerHTML (full page or CSS-selector-targeted element) via Runtime.evaluate
|
||||||
|
- GET /v1/browser/markdown: strips nav/footer/aside/header elements, converts to Markdown via html2md crate
|
||||||
|
- GET /v1/browser/links: extracts all a[href] elements as {href, text} array via Runtime.evaluate with JSON.stringify
|
||||||
|
- GET /v1/browser/snapshot: returns text representation of accessibility tree via Accessibility.getFullAXTree, filtering out noise nodes (none, GenericContainer)
|
||||||
|
- Added html2md = "0.2" dependency to Cargo.toml
|
||||||
|
- Files changed: Cargo.toml, router.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- `html2md::parse_html()` is a simple single-function API for HTML-to-Markdown conversion
|
||||||
|
- CDP `Accessibility.getFullAXTree` returns `{nodes: [{role: {value}, name: {value}, ...}]}` - role and name are nested objects with `value` field
|
||||||
|
- For DOM extraction via CDP, use `Runtime.evaluate` with `returnByValue: true` and serialize complex results to JSON string in the expression, then deserialize in Rust
|
||||||
|
- When stripping DOM elements before extraction, clone the body first (`document.body.cloneNode(true)`) to avoid mutating the live page
|
||||||
|
- `BrowserContentQuery` selector uses `document.querySelector()` (first match); returns 404 if element not found
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-012
|
||||||
|
- Implemented POST /v1/browser/scrape and POST /v1/browser/execute endpoints
|
||||||
|
- POST /v1/browser/scrape: accepts `{selectors: Record<string,string>, url?}`, evaluates querySelectorAll for each selector, collects textContent, returns `{data, url, title}`
|
||||||
|
- POST /v1/browser/execute: accepts `{expression, awaitPromise?}`, runs Runtime.evaluate with returnByValue, checks for exceptionDetails, returns `{result, type}`
|
||||||
|
- Both routes registered in v1_router and OpenAPI paths/schemas
|
||||||
|
- Files changed: router.rs
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- For scrape, serialize the selectors map to JSON and embed in the JS expression so all selectors run in a single Runtime.evaluate call (avoids multiple CDP round trips)
|
||||||
|
- Runtime.evaluate `exceptionDetails` contains `exception.description` or `text` for error messages
|
||||||
|
- `returnByValue: true` returns the JS value directly; for complex objects, serialize to JSON string in JS and deserialize in Rust
|
||||||
|
- `awaitPromise: true` in Runtime.evaluate params makes CDP wait for Promise resolution
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-013
|
||||||
|
- Implemented 5 browser interaction POST endpoints: /v1/browser/click, /v1/browser/type, /v1/browser/select, /v1/browser/hover, /v1/browser/scroll
|
||||||
|
- POST /v1/browser/click: DOM.querySelector + DOM.getBoxModel to find element center, then Input.dispatchMouseEvent (mousePressed + mouseReleased) with button/clickCount support
|
||||||
|
- POST /v1/browser/type: DOM.querySelector + DOM.focus to focus element, optional clear via Runtime.evaluate, then Input.dispatchKeyEvent (keyDown + keyUp) per character with optional delay
|
||||||
|
- POST /v1/browser/select: Runtime.evaluate to set select element value and dispatch change event
|
||||||
|
- POST /v1/browser/hover: DOM.querySelector + DOM.getBoxModel + Input.dispatchMouseEvent (mouseMoved)
|
||||||
|
- POST /v1/browser/scroll: Runtime.evaluate with scrollBy() on window or specific element
|
||||||
|
- All return BrowserActionResponse { ok: true }
|
||||||
|
- All routes registered in v1_router and OpenAPI paths/schemas
|
||||||
|
- Files changed: router.rs, prd.json
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CDP `DOM.getBoxModel` returns `{model: {content: [x1,y1,x2,y2,x3,y3,x4,y4]}}` - content is a flat array of 4 corner points, compute center by averaging x-coords and y-coords separately
|
||||||
|
- CDP `Input.dispatchMouseEvent` requires both mousePressed and mouseReleased for a complete click
|
||||||
|
- CDP `Input.dispatchKeyEvent` with type "keyDown" + "keyUp" and "text" field types individual characters
|
||||||
|
- For select/scroll, Runtime.evaluate is simpler and more reliable than CDP DOM commands since we can set .value directly and dispatch events
|
||||||
|
- Escape single quotes and backslashes in CSS selectors embedded in JS template strings
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-014
|
||||||
|
- Implemented POST /v1/browser/upload and POST /v1/browser/dialog endpoints
|
||||||
|
- POST /v1/browser/upload: DOM.enable → DOM.getDocument → DOM.querySelector → DOM.setFileInputFiles with file path array and nodeId
|
||||||
|
- POST /v1/browser/dialog: Page.handleJavaScriptDialog with accept boolean and optional promptText for prompt dialogs
|
||||||
|
- Both return BrowserActionResponse { ok: true }
|
||||||
|
- Routes registered in v1_router and OpenAPI paths/schemas (BrowserUploadRequest, BrowserDialogRequest)
|
||||||
|
- Files changed: router.rs, prd.json
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CDP `DOM.setFileInputFiles` takes `{files: [path], nodeId}` - files is an array of file paths even for single file upload
|
||||||
|
- CDP `Page.handleJavaScriptDialog` takes `{accept: bool, promptText?: string}` - promptText only relevant for prompt() dialogs
|
||||||
|
- Upload handler follows same DOM.enable → DOM.getDocument → DOM.querySelector pattern as click/hover handlers
|
||||||
|
- Dialog handler is simpler - no DOM operations needed, just the Page domain command
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-03-17 - US-015
|
||||||
|
- Implemented GET /v1/browser/console and GET /v1/browser/network endpoints
|
||||||
|
- Added CDP event subscriptions in browser_runtime.rs start() method after CDP client connects:
|
||||||
|
- `Runtime.enable` + subscribe to `Runtime.consoleAPICalled` → populates console_messages ring buffer
|
||||||
|
- `Network.enable` + subscribe to `Network.requestWillBeSent` → creates network request entries
|
||||||
|
- Subscribe to `Network.responseReceived` → updates existing request entries with status, mimeType, encodedDataLength
|
||||||
|
- Added `request_id` field (internal, skip_serializing) to BrowserNetworkRequest for correlating request/response events
|
||||||
|
- GET /v1/browser/console: accepts level?, limit? query params; calls browser_runtime.console_messages()
|
||||||
|
- GET /v1/browser/network: accepts limit?, urlPattern? query params; calls browser_runtime.network_requests()
|
||||||
|
- Both handlers use ensure_active() guard before accessing data
|
||||||
|
- Routes and schemas registered in v1_router and OpenAPI
|
||||||
|
- Files changed: browser_runtime.rs, browser_types.rs, router.rs, prd.json
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- CDP `Runtime.consoleAPICalled` params: `{type, args: [{value?, description?}], stackTrace?: {callFrames: [{url, lineNumber}]}, timestamp}` - timestamp is seconds since epoch (multiply by 1000 for millis)
|
||||||
|
- CDP `Network.requestWillBeSent` params: `{requestId, request: {url, method}, timestamp}` - requestId is used to correlate with responseReceived
|
||||||
|
- CDP `Network.responseReceived` params: `{requestId, response: {status, mimeType, encodedDataLength}}` - find matching request in ring buffer by requestId
|
||||||
|
- Background tokio::spawn tasks for event processing don't need explicit cleanup; they terminate when CDP subscription channel closes (on browser stop)
|
||||||
|
- Added internal `request_id` field with `#[serde(skip_serializing)]` to keep it out of API responses while enabling request/response correlation
|
||||||
|
---
|
||||||
|
|
@ -288,7 +288,170 @@ impl BrowserRuntime {
|
||||||
// Connect CDP client
|
// Connect CDP client
|
||||||
match CdpClient::connect().await {
|
match CdpClient::connect().await {
|
||||||
Ok(client) => {
|
Ok(client) => {
|
||||||
state.cdp_client = Some(Arc::new(client));
|
let cdp = Arc::new(client);
|
||||||
|
state.cdp_client = Some(cdp.clone());
|
||||||
|
|
||||||
|
// Enable Runtime and Network domains for event monitoring
|
||||||
|
let _ = cdp.send("Runtime.enable", None).await;
|
||||||
|
let _ = cdp.send("Network.enable", None).await;
|
||||||
|
|
||||||
|
// Subscribe to console events and populate ring buffer
|
||||||
|
let console_rx = cdp.subscribe("Runtime.consoleAPICalled").await;
|
||||||
|
let inner_clone = self.inner.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut rx = console_rx;
|
||||||
|
while let Some(params) = rx.recv().await {
|
||||||
|
let level = params
|
||||||
|
.get("type")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("log")
|
||||||
|
.to_string();
|
||||||
|
// CDP uses "warning" as type but we normalize to "warning"
|
||||||
|
let args = params
|
||||||
|
.get("args")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_default();
|
||||||
|
let text = args
|
||||||
|
.iter()
|
||||||
|
.filter_map(|a| {
|
||||||
|
a.get("value")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.or_else(|| {
|
||||||
|
a.get("description")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ");
|
||||||
|
let stack_trace = params.get("stackTrace");
|
||||||
|
let call_frame = stack_trace
|
||||||
|
.and_then(|st| st.get("callFrames"))
|
||||||
|
.and_then(|cf| cf.as_array())
|
||||||
|
.and_then(|cf| cf.first());
|
||||||
|
let url = call_frame
|
||||||
|
.and_then(|f| f.get("url"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
let line = call_frame
|
||||||
|
.and_then(|f| f.get("lineNumber"))
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|n| n as u32);
|
||||||
|
let timestamp = params
|
||||||
|
.get("timestamp")
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.map(|ts| {
|
||||||
|
chrono::DateTime::from_timestamp_millis((ts * 1000.0) as i64)
|
||||||
|
.map(|dt| dt.to_rfc3339())
|
||||||
|
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
|
||||||
|
|
||||||
|
let msg = BrowserConsoleMessage {
|
||||||
|
level,
|
||||||
|
text,
|
||||||
|
url,
|
||||||
|
line,
|
||||||
|
timestamp,
|
||||||
|
};
|
||||||
|
let mut state = inner_clone.lock().await;
|
||||||
|
if state.console_messages.len() >= MAX_CONSOLE_MESSAGES {
|
||||||
|
state.console_messages.pop_front();
|
||||||
|
}
|
||||||
|
state.console_messages.push_back(msg);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Subscribe to network request events and populate ring buffer
|
||||||
|
let request_rx = cdp.subscribe("Network.requestWillBeSent").await;
|
||||||
|
let response_rx = cdp.subscribe("Network.responseReceived").await;
|
||||||
|
let inner_clone2 = self.inner.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut rx = request_rx;
|
||||||
|
while let Some(params) = rx.recv().await {
|
||||||
|
let request_id = params
|
||||||
|
.get("requestId")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let request = params.get("request");
|
||||||
|
let url = request
|
||||||
|
.and_then(|r| r.get("url"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let method = request
|
||||||
|
.and_then(|r| r.get("method"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("GET")
|
||||||
|
.to_string();
|
||||||
|
let timestamp = params
|
||||||
|
.get("timestamp")
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.map(|ts| {
|
||||||
|
chrono::DateTime::from_timestamp_millis((ts * 1000.0) as i64)
|
||||||
|
.map(|dt| dt.to_rfc3339())
|
||||||
|
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
|
||||||
|
|
||||||
|
let net_req = BrowserNetworkRequest {
|
||||||
|
request_id: Some(request_id),
|
||||||
|
url,
|
||||||
|
method,
|
||||||
|
status: None,
|
||||||
|
mime_type: None,
|
||||||
|
response_size: None,
|
||||||
|
duration: None,
|
||||||
|
timestamp,
|
||||||
|
};
|
||||||
|
let mut state = inner_clone2.lock().await;
|
||||||
|
if state.network_requests.len() >= MAX_NETWORK_REQUESTS {
|
||||||
|
state.network_requests.pop_front();
|
||||||
|
}
|
||||||
|
state.network_requests.push_back(net_req);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Subscribe to network response events to update existing requests
|
||||||
|
let inner_clone3 = self.inner.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut rx = response_rx;
|
||||||
|
while let Some(params) = rx.recv().await {
|
||||||
|
let request_id = params
|
||||||
|
.get("requestId")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("");
|
||||||
|
let response = params.get("response");
|
||||||
|
let status = response
|
||||||
|
.and_then(|r| r.get("status"))
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|s| s as u16);
|
||||||
|
let mime_type = response
|
||||||
|
.and_then(|r| r.get("mimeType"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
let response_size = response
|
||||||
|
.and_then(|r| r.get("encodedDataLength"))
|
||||||
|
.and_then(|v| v.as_u64());
|
||||||
|
|
||||||
|
let mut state = inner_clone3.lock().await;
|
||||||
|
// Find the matching request and update it
|
||||||
|
if let Some(req) = state
|
||||||
|
.network_requests
|
||||||
|
.iter_mut()
|
||||||
|
.rev()
|
||||||
|
.find(|r| r.request_id.as_deref() == Some(request_id))
|
||||||
|
{
|
||||||
|
req.status = status;
|
||||||
|
req.mime_type = mime_type;
|
||||||
|
req.response_size = response_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
Err(problem) => {
|
Err(problem) => {
|
||||||
return Err(self.fail_start_locked(&mut state, problem).await);
|
return Err(self.fail_start_locked(&mut state, problem).await);
|
||||||
|
|
|
||||||
|
|
@ -426,6 +426,9 @@ pub struct BrowserNetworkQuery {
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct BrowserNetworkRequest {
|
pub struct BrowserNetworkRequest {
|
||||||
|
/// Internal CDP request ID for correlating request/response events.
|
||||||
|
#[serde(default, skip_serializing)]
|
||||||
|
pub request_id: Option<String>,
|
||||||
pub url: String,
|
pub url: String,
|
||||||
pub method: String,
|
pub method: String,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
|
|
||||||
|
|
@ -305,6 +305,8 @@ pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>)
|
||||||
.route("/browser/scroll", post(post_v1_browser_scroll))
|
.route("/browser/scroll", post(post_v1_browser_scroll))
|
||||||
.route("/browser/upload", post(post_v1_browser_upload))
|
.route("/browser/upload", post(post_v1_browser_upload))
|
||||||
.route("/browser/dialog", post(post_v1_browser_dialog))
|
.route("/browser/dialog", post(post_v1_browser_dialog))
|
||||||
|
.route("/browser/console", get(get_v1_browser_console))
|
||||||
|
.route("/browser/network", get(get_v1_browser_network))
|
||||||
.route("/agents", get(get_v1_agents))
|
.route("/agents", get(get_v1_agents))
|
||||||
.route("/agents/:agent", get(get_v1_agent))
|
.route("/agents/:agent", get(get_v1_agent))
|
||||||
.route("/agents/:agent/install", post(post_v1_agent_install))
|
.route("/agents/:agent/install", post(post_v1_agent_install))
|
||||||
|
|
@ -520,6 +522,8 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
|
||||||
post_v1_browser_scroll,
|
post_v1_browser_scroll,
|
||||||
post_v1_browser_upload,
|
post_v1_browser_upload,
|
||||||
post_v1_browser_dialog,
|
post_v1_browser_dialog,
|
||||||
|
get_v1_browser_console,
|
||||||
|
get_v1_browser_network,
|
||||||
get_v1_agents,
|
get_v1_agents,
|
||||||
get_v1_agent,
|
get_v1_agent,
|
||||||
post_v1_agent_install,
|
post_v1_agent_install,
|
||||||
|
|
@ -625,6 +629,12 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
|
||||||
BrowserScrollRequest,
|
BrowserScrollRequest,
|
||||||
BrowserUploadRequest,
|
BrowserUploadRequest,
|
||||||
BrowserDialogRequest,
|
BrowserDialogRequest,
|
||||||
|
BrowserConsoleQuery,
|
||||||
|
BrowserConsoleMessage,
|
||||||
|
BrowserConsoleResponse,
|
||||||
|
BrowserNetworkQuery,
|
||||||
|
BrowserNetworkRequest,
|
||||||
|
BrowserNetworkResponse,
|
||||||
DesktopClipboardResponse,
|
DesktopClipboardResponse,
|
||||||
DesktopClipboardQuery,
|
DesktopClipboardQuery,
|
||||||
DesktopClipboardWriteRequest,
|
DesktopClipboardWriteRequest,
|
||||||
|
|
@ -2620,6 +2630,60 @@ async fn post_v1_browser_dialog(
|
||||||
Ok(Json(BrowserActionResponse { ok: true }))
|
Ok(Json(BrowserActionResponse { ok: true }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get browser console messages.
|
||||||
|
///
|
||||||
|
/// Returns console messages captured from the browser, optionally filtered by
|
||||||
|
/// level (log, debug, info, warning, error) and limited in count.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/v1/browser/console",
|
||||||
|
tag = "v1",
|
||||||
|
params(BrowserConsoleQuery),
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Console messages retrieved", body = BrowserConsoleResponse),
|
||||||
|
(status = 409, description = "Browser not active", body = ProblemDetails),
|
||||||
|
(status = 500, description = "Internal error", body = ProblemDetails)
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn get_v1_browser_console(
|
||||||
|
State(state): State<Arc<AppState>>,
|
||||||
|
Query(query): Query<BrowserConsoleQuery>,
|
||||||
|
) -> Result<Json<BrowserConsoleResponse>, ApiError> {
|
||||||
|
state.browser_runtime().ensure_active().await?;
|
||||||
|
let messages = state
|
||||||
|
.browser_runtime()
|
||||||
|
.console_messages(query.level.as_deref(), query.limit)
|
||||||
|
.await;
|
||||||
|
Ok(Json(BrowserConsoleResponse { messages }))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get browser network requests.
|
||||||
|
///
|
||||||
|
/// Returns network requests captured from the browser, optionally filtered by
|
||||||
|
/// URL pattern and limited in count.
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/v1/browser/network",
|
||||||
|
tag = "v1",
|
||||||
|
params(BrowserNetworkQuery),
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Network requests retrieved", body = BrowserNetworkResponse),
|
||||||
|
(status = 409, description = "Browser not active", body = ProblemDetails),
|
||||||
|
(status = 500, description = "Internal error", body = ProblemDetails)
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn get_v1_browser_network(
|
||||||
|
State(state): State<Arc<AppState>>,
|
||||||
|
Query(query): Query<BrowserNetworkQuery>,
|
||||||
|
) -> Result<Json<BrowserNetworkResponse>, ApiError> {
|
||||||
|
state.browser_runtime().ensure_active().await?;
|
||||||
|
let requests = state
|
||||||
|
.browser_runtime()
|
||||||
|
.network_requests(query.url_pattern.as_deref(), query.limit)
|
||||||
|
.await;
|
||||||
|
Ok(Json(BrowserNetworkResponse { requests }))
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper: get the current page URL and title via CDP Runtime.evaluate.
|
/// Helper: get the current page URL and title via CDP Runtime.evaluate.
|
||||||
async fn get_page_info_via_cdp(
|
async fn get_page_info_via_cdp(
|
||||||
cdp: &crate::browser_cdp::CdpClient,
|
cdp: &crate::browser_cdp::CdpClient,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue