From 604239421d1f2aed37258aa16394c014abb1d727 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:18:36 -0700 Subject: [PATCH] feat: [US-008] - Add browser navigation endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- .../packages/sandbox-agent/src/browser_cdp.rs | 2 +- .../sandbox-agent/src/browser_runtime.rs | 23 +- server/packages/sandbox-agent/src/router.rs | 344 ++++++++++++++++++ 3 files changed, 364 insertions(+), 5 deletions(-) diff --git a/server/packages/sandbox-agent/src/browser_cdp.rs b/server/packages/sandbox-agent/src/browser_cdp.rs index cfea2ab..35b77ed 100644 --- a/server/packages/sandbox-agent/src/browser_cdp.rs +++ b/server/packages/sandbox-agent/src/browser_cdp.rs @@ -154,7 +154,7 @@ impl CdpClient { } /// Close the CDP connection and stop the reader task. - pub async fn close(self) { + pub async fn close(&self) { self.reader_task.abort(); let _ = self.ws_sender.lock().await.close().await; } diff --git a/server/packages/sandbox-agent/src/browser_runtime.rs b/server/packages/sandbox-agent/src/browser_runtime.rs index ccb29d7..250613d 100644 --- a/server/packages/sandbox-agent/src/browser_runtime.rs +++ b/server/packages/sandbox-agent/src/browser_runtime.rs @@ -74,7 +74,7 @@ struct BrowserRuntimeStateData { environment: HashMap, xvfb: Option, chromium: Option, - cdp_client: Option, + cdp_client: Option>, context_id: Option, streaming_config: Option, recording_fps: Option, @@ -288,7 +288,7 @@ impl BrowserRuntime { // Connect CDP client match CdpClient::connect().await { Ok(client) => { - state.cdp_client = Some(client); + state.cdp_client = Some(Arc::new(client)); } Err(problem) => { return Err(self.fail_start_locked(&mut state, problem).await); @@ -349,7 +349,7 @@ impl BrowserRuntime { self.write_runtime_log_locked(&state, "stopping browser runtime"); // Close CDP client - if let Some(cdp_client) = state.cdp_client.take() { + if let Some(ref cdp_client) = state.cdp_client.take() { cdp_client.close().await; } @@ -417,6 +417,21 @@ impl BrowserRuntime { f(cdp).await } + /// Get an Arc-wrapped CDP client handle. + /// + /// Returns a cloned `Arc` after verifying the browser is active. + /// The caller can use the returned handle without holding the state lock. + pub async fn get_cdp(&self) -> Result, BrowserProblem> { + let state = self.inner.lock().await; + if state.state != BrowserState::Active { + return Err(BrowserProblem::not_active()); + } + state + .cdp_client + .clone() + .ok_or_else(|| BrowserProblem::cdp_error("CDP client is not connected")) + } + /// Ensure the browser runtime is active. /// /// Returns `BrowserProblem::NotActive` if the browser is not running. @@ -818,7 +833,7 @@ impl BrowserRuntime { self.write_runtime_log_locked(state, "browser runtime startup failed; cleaning up"); // Close CDP client if any - if let Some(cdp) = state.cdp_client.take() { + if let Some(ref cdp) = state.cdp_client.take() { cdp.close().await; } diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 04986e4..f8d55db 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -276,6 +276,11 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/start", post(post_v1_browser_start)) .route("/browser/stop", post(post_v1_browser_stop)) .route("/browser/cdp", get(get_v1_browser_cdp_ws)) + .route("/browser/navigate", post(post_v1_browser_navigate)) + .route("/browser/back", post(post_v1_browser_back)) + .route("/browser/forward", post(post_v1_browser_forward)) + .route("/browser/reload", post(post_v1_browser_reload)) + .route("/browser/wait", post(post_v1_browser_wait)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -467,6 +472,11 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_browser_start, post_v1_browser_stop, get_v1_browser_cdp_ws, + post_v1_browser_navigate, + post_v1_browser_back, + post_v1_browser_forward, + post_v1_browser_reload, + post_v1_browser_wait, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -539,6 +549,13 @@ pub async fn shutdown_servers(state: &Arc) { BrowserState, BrowserStartRequest, BrowserStatusResponse, + BrowserNavigateRequest, + BrowserNavigateWaitUntil, + BrowserPageInfo, + BrowserReloadRequest, + BrowserWaitRequest, + BrowserWaitState, + BrowserWaitResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -921,6 +938,333 @@ async fn browser_cdp_ws_session(mut client_ws: WebSocket, browser_runtime: Arc>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + // Enable Page domain for lifecycle events + cdp.send("Page.enable", None).await?; + + let nav_result = cdp + .send( + "Page.navigate", + Some(serde_json::json!({ "url": body.url })), + ) + .await?; + + // Extract HTTP status from the navigation result if available + let status = nav_result + .get("errorText") + .and_then(|_| None::) + .or_else(|| { + // Page.navigate doesn't directly return HTTP status; + // we rely on frameId being present as a success signal + nav_result.get("frameId").map(|_| 200u16) + }); + + // Wait for the requested lifecycle event + match body.wait_until { + Some(BrowserNavigateWaitUntil::Load) | None => { + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } + Some(BrowserNavigateWaitUntil::Domcontentloaded) => { + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + Some(BrowserNavigateWaitUntil::Networkidle) => { + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + } + + // Get current page URL and title + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { url, title, status })) +} + +/// Navigate the browser back in history. +/// +/// Sends a CDP `Page.navigateToHistoryEntry` command with the previous +/// history entry and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/back", + tag = "v1", + responses( + (status = 200, description = "Page info after navigating back", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_back( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let history = cdp.send("Page.getNavigationHistory", None).await?; + let current_index = history + .get("currentIndex") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + let entries = history + .get("entries") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if current_index > 0 { + if let Some(entry) = entries.get((current_index - 1) as usize) { + if let Some(entry_id) = entry.get("id").and_then(|v| v.as_i64()) { + cdp.send( + "Page.navigateToHistoryEntry", + Some(serde_json::json!({ "entryId": entry_id })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + } + } + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Navigate the browser forward in history. +/// +/// Sends a CDP `Page.navigateToHistoryEntry` command with the next +/// history entry and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/forward", + tag = "v1", + responses( + (status = 200, description = "Page info after navigating forward", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_forward( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let history = cdp.send("Page.getNavigationHistory", None).await?; + let current_index = history + .get("currentIndex") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + let entries = history + .get("entries") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if (current_index + 1) < entries.len() as i64 { + if let Some(entry) = entries.get((current_index + 1) as usize) { + if let Some(entry_id) = entry.get("id").and_then(|v| v.as_i64()) { + cdp.send( + "Page.navigateToHistoryEntry", + Some(serde_json::json!({ "entryId": entry_id })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + } + } + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Reload the current browser page. +/// +/// Sends a CDP `Page.reload` command with an optional cache bypass flag +/// and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/reload", + tag = "v1", + request_body = BrowserReloadRequest, + responses( + (status = 200, description = "Page info after reload", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_reload( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let ignore_cache = body.ignore_cache.unwrap_or(false); + cdp.send( + "Page.reload", + Some(serde_json::json!({ "ignoreCache": ignore_cache })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Wait for a selector or condition in the browser. +/// +/// Polls the page DOM using `Runtime.evaluate` with a `querySelector` check +/// until the element is found or the timeout expires. +#[utoipa::path( + post, + path = "/v1/browser/wait", + tag = "v1", + request_body = BrowserWaitRequest, + responses( + (status = 200, description = "Wait result", body = BrowserWaitResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails), + (status = 504, description = "Timeout waiting for condition", body = ProblemDetails) + ) +)] +async fn post_v1_browser_wait( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let timeout_ms = body.timeout.unwrap_or(5000); + let selector = body.selector.clone().unwrap_or_else(|| "body".to_string()); + let wait_state = body.state.unwrap_or(BrowserWaitState::Attached); + + let js_expression = match wait_state { + BrowserWaitState::Visible => { + format!( + r#"(() => {{ + const el = document.querySelector({sel}); + if (!el) return false; + const style = window.getComputedStyle(el); + return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'; + }})()"#, + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + BrowserWaitState::Hidden => { + format!( + r#"(() => {{ + const el = document.querySelector({sel}); + if (!el) return true; + const style = window.getComputedStyle(el); + return style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0'; + }})()"#, + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + BrowserWaitState::Attached => { + format!( + "document.querySelector({sel}) !== null", + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + }; + + let start = tokio::time::Instant::now(); + let timeout_dur = std::time::Duration::from_millis(timeout_ms); + let poll_interval = std::time::Duration::from_millis(100); + + loop { + let eval_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": js_expression, + "returnByValue": true + })), + ) + .await?; + + let found = eval_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if found { + return Ok(Json(BrowserWaitResponse { found: true })); + } + + if start.elapsed() >= timeout_dur { + return Ok(Json(BrowserWaitResponse { found: false })); + } + + tokio::time::sleep(poll_interval).await; + } +} + +/// Helper: get the current page URL and title via CDP Runtime.evaluate. +async fn get_page_info_via_cdp( + cdp: &crate::browser_cdp::CdpClient, +) -> Result<(String, String), BrowserProblem> { + let url_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": "document.location.href", + "returnByValue": true + })), + ) + .await?; + let url = url_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let title_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": "document.title", + "returnByValue": true + })), + ) + .await?; + let title = title_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + Ok((url, title)) +} + /// Capture a full desktop screenshot. /// /// Performs a health-gated full-frame screenshot of the managed desktop and