mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 04:03:31 +00:00
feat: [US-010] - Add browser screenshot and PDF endpoints
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
58d7acaabe
commit
45258c32b0
1 changed files with 160 additions and 0 deletions
|
|
@ -290,6 +290,8 @@ pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>)
|
|||
post(post_v1_browser_tab_activate),
|
||||
)
|
||||
.route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab))
|
||||
.route("/browser/screenshot", get(get_v1_browser_screenshot))
|
||||
.route("/browser/pdf", get(get_v1_browser_pdf))
|
||||
.route("/agents", get(get_v1_agents))
|
||||
.route("/agents/:agent", get(get_v1_agent))
|
||||
.route("/agents/:agent/install", post(post_v1_agent_install))
|
||||
|
|
@ -490,6 +492,8 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
|
|||
post_v1_browser_tabs,
|
||||
post_v1_browser_tab_activate,
|
||||
delete_v1_browser_tab,
|
||||
get_v1_browser_screenshot,
|
||||
get_v1_browser_pdf,
|
||||
get_v1_agents,
|
||||
get_v1_agent,
|
||||
post_v1_agent_install,
|
||||
|
|
@ -573,6 +577,10 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
|
|||
BrowserTabListResponse,
|
||||
BrowserCreateTabRequest,
|
||||
BrowserActionResponse,
|
||||
BrowserScreenshotQuery,
|
||||
BrowserScreenshotFormat,
|
||||
BrowserPdfQuery,
|
||||
BrowserPdfFormat,
|
||||
DesktopClipboardResponse,
|
||||
DesktopClipboardQuery,
|
||||
DesktopClipboardWriteRequest,
|
||||
|
|
@ -1503,6 +1511,158 @@ async fn delete_v1_browser_tab(
|
|||
Ok(Json(BrowserActionResponse { ok: true }))
|
||||
}
|
||||
|
||||
/// Capture a browser page screenshot.
|
||||
///
|
||||
/// Captures a screenshot of the current browser page via CDP
|
||||
/// `Page.captureScreenshot` and returns the image bytes with the appropriate
|
||||
/// Content-Type header.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/browser/screenshot",
|
||||
tag = "v1",
|
||||
params(BrowserScreenshotQuery),
|
||||
responses(
|
||||
(status = 200, description = "Browser screenshot as image bytes"),
|
||||
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
|
||||
(status = 502, description = "CDP command failed", body = ProblemDetails)
|
||||
)
|
||||
)]
|
||||
async fn get_v1_browser_screenshot(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Query(query): Query<BrowserScreenshotQuery>,
|
||||
) -> Result<Response, ApiError> {
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_ENGINE;
|
||||
use base64::Engine;
|
||||
|
||||
let cdp = state.browser_runtime().get_cdp().await?;
|
||||
|
||||
let fmt = query.format.unwrap_or(BrowserScreenshotFormat::Png);
|
||||
let cdp_format = match fmt {
|
||||
BrowserScreenshotFormat::Png => "png",
|
||||
BrowserScreenshotFormat::Jpeg => "jpeg",
|
||||
BrowserScreenshotFormat::Webp => "webp",
|
||||
};
|
||||
|
||||
let mut params = serde_json::json!({ "format": cdp_format });
|
||||
if let Some(quality) = query.quality {
|
||||
params["quality"] = serde_json::json!(quality);
|
||||
}
|
||||
if query.full_page.unwrap_or(false) {
|
||||
params["captureBeyondViewport"] = serde_json::json!(true);
|
||||
}
|
||||
if let Some(ref selector) = query.selector {
|
||||
// Resolve element bounding box for clip region
|
||||
let js = format!(
|
||||
r#"(() => {{
|
||||
const el = document.querySelector({selector});
|
||||
if (!el) return null;
|
||||
const r = el.getBoundingClientRect();
|
||||
return {{ x: r.x, y: r.y, width: r.width, height: r.height }};
|
||||
}})()"#,
|
||||
selector = serde_json::to_string(selector).unwrap_or_default()
|
||||
);
|
||||
let eval_result = cdp
|
||||
.send(
|
||||
"Runtime.evaluate",
|
||||
Some(serde_json::json!({
|
||||
"expression": js,
|
||||
"returnByValue": true
|
||||
})),
|
||||
)
|
||||
.await?;
|
||||
if let Some(value) = eval_result.get("result").and_then(|r| r.get("value")) {
|
||||
if !value.is_null() {
|
||||
params["clip"] = serde_json::json!({
|
||||
"x": value.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
"y": value.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
"width": value.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
"height": value.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
"scale": 1
|
||||
});
|
||||
} else {
|
||||
return Err(BrowserProblem::invalid_selector(&format!(
|
||||
"No element matches selector: {}",
|
||||
selector
|
||||
))
|
||||
.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let result = cdp.send("Page.captureScreenshot", Some(params)).await?;
|
||||
|
||||
let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let bytes = BASE64_ENGINE
|
||||
.decode(data_b64)
|
||||
.map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode screenshot: {}", e)))?;
|
||||
|
||||
let content_type = match fmt {
|
||||
BrowserScreenshotFormat::Png => "image/png",
|
||||
BrowserScreenshotFormat::Jpeg => "image/jpeg",
|
||||
BrowserScreenshotFormat::Webp => "image/webp",
|
||||
};
|
||||
|
||||
Ok(([(header::CONTENT_TYPE, content_type)], Bytes::from(bytes)).into_response())
|
||||
}
|
||||
|
||||
/// Generate a PDF of the current browser page.
|
||||
///
|
||||
/// Generates a PDF document from the current page via CDP `Page.printToPDF`
|
||||
/// and returns the PDF bytes.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/browser/pdf",
|
||||
tag = "v1",
|
||||
params(BrowserPdfQuery),
|
||||
responses(
|
||||
(status = 200, description = "Browser page as PDF bytes"),
|
||||
(status = 409, description = "Browser runtime is not active", body = ProblemDetails),
|
||||
(status = 502, description = "CDP command failed", body = ProblemDetails)
|
||||
)
|
||||
)]
|
||||
async fn get_v1_browser_pdf(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Query(query): Query<BrowserPdfQuery>,
|
||||
) -> Result<Response, ApiError> {
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_ENGINE;
|
||||
use base64::Engine;
|
||||
|
||||
let cdp = state.browser_runtime().get_cdp().await?;
|
||||
|
||||
let (paper_width, paper_height) = match query.format.unwrap_or(BrowserPdfFormat::Letter) {
|
||||
BrowserPdfFormat::A4 => (8.27_f64, 11.69_f64),
|
||||
BrowserPdfFormat::Letter => (8.5_f64, 11.0_f64),
|
||||
BrowserPdfFormat::Legal => (8.5_f64, 14.0_f64),
|
||||
};
|
||||
|
||||
let mut params = serde_json::json!({
|
||||
"paperWidth": paper_width,
|
||||
"paperHeight": paper_height,
|
||||
});
|
||||
if let Some(landscape) = query.landscape {
|
||||
params["landscape"] = serde_json::json!(landscape);
|
||||
}
|
||||
if let Some(print_background) = query.print_background {
|
||||
params["printBackground"] = serde_json::json!(print_background);
|
||||
}
|
||||
if let Some(scale) = query.scale {
|
||||
params["scale"] = serde_json::json!(scale);
|
||||
}
|
||||
|
||||
let result = cdp.send("Page.printToPDF", Some(params)).await?;
|
||||
|
||||
let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let bytes = BASE64_ENGINE
|
||||
.decode(data_b64)
|
||||
.map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode PDF: {}", e)))?;
|
||||
|
||||
Ok((
|
||||
[(header::CONTENT_TYPE, "application/pdf")],
|
||||
Bytes::from(bytes),
|
||||
)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
/// Helper: get the current page URL and title via CDP Runtime.evaluate.
|
||||
async fn get_page_info_via_cdp(
|
||||
cdp: &crate::browser_cdp::CdpClient,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue