From 1ae732d5b652ed8788c8ff4e9da66e320e08a42c Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:32:21 -0700 Subject: [PATCH] feat: [US-011] - Add browser content extraction endpoints (HTML, markdown, links, snapshot) Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/Cargo.toml | 1 + server/packages/sandbox-agent/src/router.rs | 245 ++++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/server/packages/sandbox-agent/Cargo.toml b/server/packages/sandbox-agent/Cargo.toml index 8749a22..1fe04cf 100644 --- a/server/packages/sandbox-agent/Cargo.toml +++ b/server/packages/sandbox-agent/Cargo.toml @@ -42,6 +42,7 @@ toml_edit.workspace = true tar.workspace = true zip.workspace = true tokio-tungstenite = "0.24" +html2md = "0.2" tempfile = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 313a878..4040314 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -292,6 +292,10 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) .route("/browser/screenshot", get(get_v1_browser_screenshot)) .route("/browser/pdf", get(get_v1_browser_pdf)) + .route("/browser/content", get(get_v1_browser_content)) + .route("/browser/markdown", get(get_v1_browser_markdown)) + .route("/browser/links", get(get_v1_browser_links)) + .route("/browser/snapshot", get(get_v1_browser_snapshot)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -494,6 +498,10 @@ pub async fn shutdown_servers(state: &Arc) { delete_v1_browser_tab, get_v1_browser_screenshot, get_v1_browser_pdf, + get_v1_browser_content, + get_v1_browser_markdown, + get_v1_browser_links, + get_v1_browser_snapshot, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -581,6 +589,12 @@ pub async fn shutdown_servers(state: &Arc) { BrowserScreenshotFormat, BrowserPdfQuery, BrowserPdfFormat, + BrowserContentQuery, + BrowserContentResponse, + BrowserMarkdownResponse, + BrowserLinkInfo, + BrowserLinksResponse, + BrowserSnapshotResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1663,6 +1677,237 @@ async fn get_v1_browser_pdf( .into_response()) } +/// Get the HTML content of the current browser page. +/// +/// Returns the outerHTML of the page or a specific element selected by a CSS +/// selector, along with the current URL and title. +#[utoipa::path( + get, + path = "/v1/browser/content", + tag = "v1", + params(BrowserContentQuery), + responses( + (status = 200, description = "Page HTML content", body = BrowserContentResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_content( + State(state): State>, + Query(query): Query, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + let expression = if let Some(ref selector) = query.selector { + let escaped = selector.replace('\\', "\\\\").replace('\'', "\\'"); + format!( + "(function() {{ var el = document.querySelector('{}'); return el ? el.outerHTML : null; }})()", + escaped + ) + } else { + "document.documentElement.outerHTML".to_string() + }; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let html = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + if query.selector.is_some() && html.is_empty() { + return Err(BrowserProblem::not_found(&format!( + "Element not found: {}", + query.selector.as_deref().unwrap_or("") + )) + .into()); + } + + Ok(Json(BrowserContentResponse { html, url, title })) +} + +/// Get the page content as Markdown. +/// +/// Extracts the DOM HTML via CDP, strips navigation/footer/aside elements, and +/// converts the remaining content to Markdown using html2md. +#[utoipa::path( + get, + path = "/v1/browser/markdown", + tag = "v1", + responses( + (status = 200, description = "Page content as Markdown", body = BrowserMarkdownResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_markdown( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + // Extract body HTML with nav/footer/aside stripped out + let expression = r#" + (function() { + var clone = document.body.cloneNode(true); + var selectors = ['nav', 'footer', 'aside', 'header', '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]']; + selectors.forEach(function(sel) { + clone.querySelectorAll(sel).forEach(function(el) { el.remove(); }); + }); + return clone.innerHTML; + })() + "#; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let html = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + + let markdown = html2md::parse_html(html); + + Ok(Json(BrowserMarkdownResponse { + markdown, + url, + title, + })) +} + +/// Get all links on the current page. +/// +/// Extracts all anchor elements from the page via CDP and returns their href +/// and text content. +#[utoipa::path( + get, + path = "/v1/browser/links", + tag = "v1", + responses( + (status = 200, description = "Links on the page", body = BrowserLinksResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_links( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, _title) = get_page_info_via_cdp(&cdp).await?; + + let expression = r#" + (function() { + var links = []; + document.querySelectorAll('a[href]').forEach(function(a) { + links.push({ href: a.href, text: (a.textContent || '').trim() }); + }); + return JSON.stringify(links); + })() + "#; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let json_str = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("[]"); + + let links: Vec = serde_json::from_str(json_str).unwrap_or_default(); + + Ok(Json(BrowserLinksResponse { links, url })) +} + +/// Get an accessibility tree snapshot of the current page. +/// +/// Returns a text representation of the page accessibility tree via CDP +/// `Accessibility.getFullAXTree`. +#[utoipa::path( + get, + path = "/v1/browser/snapshot", + tag = "v1", + responses( + (status = 200, description = "Accessibility tree snapshot", body = BrowserSnapshotResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_snapshot( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + let result = cdp.send("Accessibility.getFullAXTree", None).await?; + + // Format the AX tree into a readable text snapshot + let nodes = result + .get("nodes") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + let mut snapshot = String::new(); + for node in &nodes { + let role = node + .get("role") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + let name = node + .get("name") + .and_then(|n| n.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + + if role == "none" || role == "GenericContainer" || (role.is_empty() && name.is_empty()) { + continue; + } + + if !snapshot.is_empty() { + snapshot.push('\n'); + } + if name.is_empty() { + snapshot.push_str(role); + } else { + snapshot.push_str(&format!("{}: {}", role, name)); + } + } + + Ok(Json(BrowserSnapshotResponse { + snapshot, + url, + title, + })) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient,