feat: [US-011] - Add browser content extraction endpoints (HTML, markdown, links, snapshot)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nathan Flurry 2026-03-17 05:32:21 -07:00
parent 45258c32b0
commit 1ae732d5b6
2 changed files with 246 additions and 0 deletions

View file

@ -42,6 +42,7 @@ toml_edit.workspace = true
tar.workspace = true tar.workspace = true
zip.workspace = true zip.workspace = true
tokio-tungstenite = "0.24" tokio-tungstenite = "0.24"
html2md = "0.2"
tempfile = { workspace = true, optional = true } tempfile = { workspace = true, optional = true }
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]

View file

@ -292,6 +292,10 @@ pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>)
.route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab))
.route("/browser/screenshot", get(get_v1_browser_screenshot)) .route("/browser/screenshot", get(get_v1_browser_screenshot))
.route("/browser/pdf", get(get_v1_browser_pdf)) .route("/browser/pdf", get(get_v1_browser_pdf))
.route("/browser/content", get(get_v1_browser_content))
.route("/browser/markdown", get(get_v1_browser_markdown))
.route("/browser/links", get(get_v1_browser_links))
.route("/browser/snapshot", get(get_v1_browser_snapshot))
.route("/agents", get(get_v1_agents)) .route("/agents", get(get_v1_agents))
.route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent", get(get_v1_agent))
.route("/agents/:agent/install", post(post_v1_agent_install)) .route("/agents/:agent/install", post(post_v1_agent_install))
@ -494,6 +498,10 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
delete_v1_browser_tab, delete_v1_browser_tab,
get_v1_browser_screenshot, get_v1_browser_screenshot,
get_v1_browser_pdf, get_v1_browser_pdf,
get_v1_browser_content,
get_v1_browser_markdown,
get_v1_browser_links,
get_v1_browser_snapshot,
get_v1_agents, get_v1_agents,
get_v1_agent, get_v1_agent,
post_v1_agent_install, post_v1_agent_install,
@ -581,6 +589,12 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
BrowserScreenshotFormat, BrowserScreenshotFormat,
BrowserPdfQuery, BrowserPdfQuery,
BrowserPdfFormat, BrowserPdfFormat,
BrowserContentQuery,
BrowserContentResponse,
BrowserMarkdownResponse,
BrowserLinkInfo,
BrowserLinksResponse,
BrowserSnapshotResponse,
DesktopClipboardResponse, DesktopClipboardResponse,
DesktopClipboardQuery, DesktopClipboardQuery,
DesktopClipboardWriteRequest, DesktopClipboardWriteRequest,
@ -1663,6 +1677,237 @@ async fn get_v1_browser_pdf(
.into_response()) .into_response())
} }
/// Get the HTML content of the current browser page.
///
/// Returns the outerHTML of the page or a specific element selected by a CSS
/// selector, along with the current URL and title.
#[utoipa::path(
get,
path = "/v1/browser/content",
tag = "v1",
params(BrowserContentQuery),
responses(
(status = 200, description = "Page HTML content", body = BrowserContentResponse),
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
(status = 502, description = "CDP command failed", body = ProblemDetails)
)
)]
async fn get_v1_browser_content(
State(state): State<Arc<AppState>>,
Query(query): Query<BrowserContentQuery>,
) -> Result<Json<BrowserContentResponse>, ApiError> {
let cdp = state.browser_runtime().get_cdp().await?;
let (url, title) = get_page_info_via_cdp(&cdp).await?;
let expression = if let Some(ref selector) = query.selector {
let escaped = selector.replace('\\', "\\\\").replace('\'', "\\'");
format!(
"(function() {{ var el = document.querySelector('{}'); return el ? el.outerHTML : null; }})()",
escaped
)
} else {
"document.documentElement.outerHTML".to_string()
};
let result = cdp
.send(
"Runtime.evaluate",
Some(serde_json::json!({
"expression": expression,
"returnByValue": true
})),
)
.await?;
let html = result
.get("result")
.and_then(|r| r.get("value"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
if query.selector.is_some() && html.is_empty() {
return Err(BrowserProblem::not_found(&format!(
"Element not found: {}",
query.selector.as_deref().unwrap_or("")
))
.into());
}
Ok(Json(BrowserContentResponse { html, url, title }))
}
/// Get the page content as Markdown.
///
/// Extracts the DOM HTML via CDP, strips navigation/footer/aside elements, and
/// converts the remaining content to Markdown using html2md.
#[utoipa::path(
get,
path = "/v1/browser/markdown",
tag = "v1",
responses(
(status = 200, description = "Page content as Markdown", body = BrowserMarkdownResponse),
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
(status = 502, description = "CDP command failed", body = ProblemDetails)
)
)]
async fn get_v1_browser_markdown(
State(state): State<Arc<AppState>>,
) -> Result<Json<BrowserMarkdownResponse>, ApiError> {
let cdp = state.browser_runtime().get_cdp().await?;
let (url, title) = get_page_info_via_cdp(&cdp).await?;
// Extract body HTML with nav/footer/aside stripped out
let expression = r#"
(function() {
var clone = document.body.cloneNode(true);
var selectors = ['nav', 'footer', 'aside', 'header', '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]'];
selectors.forEach(function(sel) {
clone.querySelectorAll(sel).forEach(function(el) { el.remove(); });
});
return clone.innerHTML;
})()
"#;
let result = cdp
.send(
"Runtime.evaluate",
Some(serde_json::json!({
"expression": expression,
"returnByValue": true
})),
)
.await?;
let html = result
.get("result")
.and_then(|r| r.get("value"))
.and_then(|v| v.as_str())
.unwrap_or("");
let markdown = html2md::parse_html(html);
Ok(Json(BrowserMarkdownResponse {
markdown,
url,
title,
}))
}
/// Get all links on the current page.
///
/// Extracts all anchor elements from the page via CDP and returns their href
/// and text content.
#[utoipa::path(
get,
path = "/v1/browser/links",
tag = "v1",
responses(
(status = 200, description = "Links on the page", body = BrowserLinksResponse),
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
(status = 502, description = "CDP command failed", body = ProblemDetails)
)
)]
async fn get_v1_browser_links(
State(state): State<Arc<AppState>>,
) -> Result<Json<BrowserLinksResponse>, ApiError> {
let cdp = state.browser_runtime().get_cdp().await?;
let (url, _title) = get_page_info_via_cdp(&cdp).await?;
let expression = r#"
(function() {
var links = [];
document.querySelectorAll('a[href]').forEach(function(a) {
links.push({ href: a.href, text: (a.textContent || '').trim() });
});
return JSON.stringify(links);
})()
"#;
let result = cdp
.send(
"Runtime.evaluate",
Some(serde_json::json!({
"expression": expression,
"returnByValue": true
})),
)
.await?;
let json_str = result
.get("result")
.and_then(|r| r.get("value"))
.and_then(|v| v.as_str())
.unwrap_or("[]");
let links: Vec<BrowserLinkInfo> = serde_json::from_str(json_str).unwrap_or_default();
Ok(Json(BrowserLinksResponse { links, url }))
}
/// Get an accessibility tree snapshot of the current page.
///
/// Returns a text representation of the page accessibility tree via CDP
/// `Accessibility.getFullAXTree`.
#[utoipa::path(
get,
path = "/v1/browser/snapshot",
tag = "v1",
responses(
(status = 200, description = "Accessibility tree snapshot", body = BrowserSnapshotResponse),
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
(status = 502, description = "CDP command failed", body = ProblemDetails)
)
)]
async fn get_v1_browser_snapshot(
State(state): State<Arc<AppState>>,
) -> Result<Json<BrowserSnapshotResponse>, ApiError> {
let cdp = state.browser_runtime().get_cdp().await?;
let (url, title) = get_page_info_via_cdp(&cdp).await?;
let result = cdp.send("Accessibility.getFullAXTree", None).await?;
// Format the AX tree into a readable text snapshot
let nodes = result
.get("nodes")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let mut snapshot = String::new();
for node in &nodes {
let role = node
.get("role")
.and_then(|r| r.get("value"))
.and_then(|v| v.as_str())
.unwrap_or("");
let name = node
.get("name")
.and_then(|n| n.get("value"))
.and_then(|v| v.as_str())
.unwrap_or("");
if role == "none" || role == "GenericContainer" || (role.is_empty() && name.is_empty()) {
continue;
}
if !snapshot.is_empty() {
snapshot.push('\n');
}
if name.is_empty() {
snapshot.push_str(role);
} else {
snapshot.push_str(&format!("{}: {}", role, name));
}
}
Ok(Json(BrowserSnapshotResponse {
snapshot,
url,
title,
}))
}
/// Helper: get the current page URL and title via CDP Runtime.evaluate. /// Helper: get the current page URL and title via CDP Runtime.evaluate.
async fn get_page_info_via_cdp( async fn get_page_info_via_cdp(
cdp: &crate::browser_cdp::CdpClient, cdp: &crate::browser_cdp::CdpClient,