From 45258c32b03c4e1e46786589241e7de235149946 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:28:06 -0700 Subject: [PATCH] feat: [US-010] - Add browser screenshot and PDF endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 160 ++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index f5b16de..313a878 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -290,6 +290,8 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) post(post_v1_browser_tab_activate), ) .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) + .route("/browser/screenshot", get(get_v1_browser_screenshot)) + .route("/browser/pdf", get(get_v1_browser_pdf)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -490,6 +492,8 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_browser_tabs, post_v1_browser_tab_activate, delete_v1_browser_tab, + get_v1_browser_screenshot, + get_v1_browser_pdf, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -573,6 +577,10 @@ pub async fn shutdown_servers(state: &Arc) { BrowserTabListResponse, BrowserCreateTabRequest, BrowserActionResponse, + BrowserScreenshotQuery, + BrowserScreenshotFormat, + BrowserPdfQuery, + BrowserPdfFormat, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1503,6 +1511,158 @@ async fn delete_v1_browser_tab( Ok(Json(BrowserActionResponse { ok: true })) } +/// Capture a browser page screenshot. +/// +/// Captures a screenshot of the current browser page via CDP +/// `Page.captureScreenshot` and returns the image bytes with the appropriate +/// Content-Type header. +#[utoipa::path( + get, + path = "/v1/browser/screenshot", + tag = "v1", + params(BrowserScreenshotQuery), + responses( + (status = 200, description = "Browser screenshot as image bytes"), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_screenshot( + State(state): State>, + Query(query): Query, +) -> Result { + use base64::engine::general_purpose::STANDARD as BASE64_ENGINE; + use base64::Engine; + + let cdp = state.browser_runtime().get_cdp().await?; + + let fmt = query.format.unwrap_or(BrowserScreenshotFormat::Png); + let cdp_format = match fmt { + BrowserScreenshotFormat::Png => "png", + BrowserScreenshotFormat::Jpeg => "jpeg", + BrowserScreenshotFormat::Webp => "webp", + }; + + let mut params = serde_json::json!({ "format": cdp_format }); + if let Some(quality) = query.quality { + params["quality"] = serde_json::json!(quality); + } + if query.full_page.unwrap_or(false) { + params["captureBeyondViewport"] = serde_json::json!(true); + } + if let Some(ref selector) = query.selector { + // Resolve element bounding box for clip region + let js = format!( + r#"(() => {{ + const el = document.querySelector({selector}); + if (!el) return null; + const r = el.getBoundingClientRect(); + return {{ x: r.x, y: r.y, width: r.width, height: r.height }}; + }})()"#, + selector = serde_json::to_string(selector).unwrap_or_default() + ); + let eval_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": js, + "returnByValue": true + })), + ) + .await?; + if let Some(value) = eval_result.get("result").and_then(|r| r.get("value")) { + if !value.is_null() { + params["clip"] = serde_json::json!({ + "x": value.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0), + "y": value.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0), + "width": value.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0), + "height": value.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0), + "scale": 1 + }); + } else { + return Err(BrowserProblem::invalid_selector(&format!( + "No element matches selector: {}", + selector + )) + .into()); + } + } + } + + let result = cdp.send("Page.captureScreenshot", Some(params)).await?; + + let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or(""); + let bytes = BASE64_ENGINE + .decode(data_b64) + .map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode screenshot: {}", e)))?; + + let content_type = match fmt { + BrowserScreenshotFormat::Png => "image/png", + BrowserScreenshotFormat::Jpeg => "image/jpeg", + BrowserScreenshotFormat::Webp => "image/webp", + }; + + Ok(([(header::CONTENT_TYPE, content_type)], Bytes::from(bytes)).into_response()) +} + +/// Generate a PDF of the current browser page. +/// +/// Generates a PDF document from the current page via CDP `Page.printToPDF` +/// and returns the PDF bytes. +#[utoipa::path( + get, + path = "/v1/browser/pdf", + tag = "v1", + params(BrowserPdfQuery), + responses( + (status = 200, description = "Browser page as PDF bytes"), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_pdf( + State(state): State>, + Query(query): Query, +) -> Result { + use base64::engine::general_purpose::STANDARD as BASE64_ENGINE; + use base64::Engine; + + let cdp = state.browser_runtime().get_cdp().await?; + + let (paper_width, paper_height) = match query.format.unwrap_or(BrowserPdfFormat::Letter) { + BrowserPdfFormat::A4 => (8.27_f64, 11.69_f64), + BrowserPdfFormat::Letter => (8.5_f64, 11.0_f64), + BrowserPdfFormat::Legal => (8.5_f64, 14.0_f64), + }; + + let mut params = serde_json::json!({ + "paperWidth": paper_width, + "paperHeight": paper_height, + }); + if let Some(landscape) = query.landscape { + params["landscape"] = serde_json::json!(landscape); + } + if let Some(print_background) = query.print_background { + params["printBackground"] = serde_json::json!(print_background); + } + if let Some(scale) = query.scale { + params["scale"] = serde_json::json!(scale); + } + + let result = cdp.send("Page.printToPDF", Some(params)).await?; + + let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or(""); + let bytes = BASE64_ENGINE + .decode(data_b64) + .map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode PDF: {}", e)))?; + + Ok(( + [(header::CONTENT_TYPE, "application/pdf")], + Bytes::from(bytes), + ) + .into_response()) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient,