mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-20 20:01:13 +00:00
feat: [US-030] - Fix crawl page load: replace sleep with readyState polling
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b582555b7c
commit
117b9c2c42
1 changed files with 30 additions and 2 deletions
|
|
@ -57,8 +57,36 @@ pub async fn crawl_pages(
|
||||||
|
|
||||||
let status = nav_result.get("frameId").map(|_| 200u16);
|
let status = nav_result.get("frameId").map(|_| 200u16);
|
||||||
|
|
||||||
// Wait for load.
|
// Wait for page load by polling document.readyState until "complete".
|
||||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
// Polls every 100ms with a 10s timeout; proceeds with extraction if timeout reached.
|
||||||
|
let poll_interval = std::time::Duration::from_millis(100);
|
||||||
|
let load_timeout = std::time::Duration::from_secs(10);
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
loop {
|
||||||
|
if start_time.elapsed() >= load_timeout {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let ready_result = cdp
|
||||||
|
.send(
|
||||||
|
"Runtime.evaluate",
|
||||||
|
Some(serde_json::json!({
|
||||||
|
"expression": "document.readyState",
|
||||||
|
"returnByValue": true
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
if let Ok(val) = ready_result {
|
||||||
|
let state = val
|
||||||
|
.get("result")
|
||||||
|
.and_then(|r| r.get("value"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("");
|
||||||
|
if state == "complete" {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokio::time::sleep(poll_interval).await;
|
||||||
|
}
|
||||||
|
|
||||||
// Get page info.
|
// Get page info.
|
||||||
let (page_url, title) = get_page_info(cdp).await?;
|
let (page_url, title) = get_page_info(cdp).await?;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue