mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 04:03:31 +00:00
feat: [US-033] - Fix default display dimensions to match spec (1280x720)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c4eb48ce6a
commit
a6ba0ecee0
10 changed files with 2275 additions and 24 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -47,6 +47,7 @@ Cargo.lock
|
|||
.agents/
|
||||
.claude/
|
||||
.opencode/
|
||||
.ralph/
|
||||
|
||||
# Example temp files
|
||||
.tmp-upload/
|
||||
|
|
|
|||
148
docs/browser-feature-matrix.mdx
Normal file
148
docs/browser-feature-matrix.mdx
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
---
|
||||
title: "Feature Matrix"
|
||||
description: "Compare Sandbox Agent's capabilities against other sandbox and browser automation providers."
|
||||
sidebarTitle: "Feature Matrix"
|
||||
icon: "table-columns"
|
||||
---
|
||||
|
||||
A comparison of Sandbox Agent's features against Daytona, E2B, Cloudflare (Browser Rendering), Browserbase, and common agent-browser tools (Steel, Stagehand, Browser Use).
|
||||
|
||||
## Sandbox Lifecycle
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Create sandbox | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| Destroy/delete | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| List sandboxes | ✓ | ✓ | ✓ | - | ✓ |
|
||||
| Start/stop | ✓ | ✓ | ✓ | ✓ | - |
|
||||
| Pause/resume | - | - | ✓ | - | - |
|
||||
| Snapshots/templates | - | ✓ | ✓ | - | ✓ |
|
||||
| Auto-stop timeout | - | ✓ | ✓ | ✓ | ✓ |
|
||||
| Region selection | - | - | - | ✓ | ✓ |
|
||||
|
||||
## Filesystem
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Read file | ✓ | ✓ | ✓ | - | - |
|
||||
| Write file | ✓ | ✓ | ✓ | - | - |
|
||||
| List directory (recursive) | ✓ | ✓ | ✓ | - | - |
|
||||
| Delete file/dir | ✓ | - | ✓ | - | - |
|
||||
| Move/rename | ✓ | - | ✓ | - | - |
|
||||
| Mkdir | ✓ | - | ✓ | - | - |
|
||||
| File stat/metadata | ✓ | - | - | - | - |
|
||||
| Batch upload (tar) | ✓ | - | - | - | - |
|
||||
| File watch/events | - | - | ✓ | - | - |
|
||||
|
||||
## Process Management
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| One-shot exec | ✓ | ✓ | ✓ | - | - |
|
||||
| Background processes | ✓ | - | ✓ | - | - |
|
||||
| Stream stdout/stderr | ✓ | - | ✓ | - | - |
|
||||
| Interactive PTY (WebSocket) | ✓ | ✓ | ✓ | - | - |
|
||||
| Terminal resize | ✓ | ✓ | ✓ | - | - |
|
||||
| Send stdin | ✓ | - | ✓ | - | - |
|
||||
| Kill/stop process | ✓ | - | ✓ | - | - |
|
||||
| List processes | ✓ | - | ✓ | - | - |
|
||||
| Process config (limits) | ✓ | - | - | - | - |
|
||||
|
||||
## Desktop / Computer-Use
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Virtual desktop | ✓ | - | ✓ | - | - |
|
||||
| Screenshot (full) | ✓ | - | ✓ | ✓ | - |
|
||||
| Screenshot (region) | ✓ | - | - | - | - |
|
||||
| Mouse (move/click/drag/scroll) | ✓ | - | ✓ | - | - |
|
||||
| Keyboard (type/press) | ✓ | - | ✓ | - | - |
|
||||
| Window management | ✓ | - | - | - | - |
|
||||
| Clipboard read/write | ✓ | - | - | - | - |
|
||||
| Launch application | ✓ | - | - | - | - |
|
||||
| Display info / DPI config | ✓ | - | - | - | - |
|
||||
| Desktop recording | ✓ | - | - | - | ✓ |
|
||||
|
||||
## Live Streaming
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Live desktop stream | ✓ | - | ✓ | - | - |
|
||||
| Protocol | WebRTC (Neko) | - | VNC | - | CDP screencast |
|
||||
| Video codecs | VP8, VP9, H.264 | - | - | - | JPEG |
|
||||
| Audio streaming | ✓ (Opus, G.722) | - | - | - | - |
|
||||
| Interactive input via stream | ✓ | - | ✓ | - | Limited |
|
||||
| Configurable FPS (1-60) | ✓ | - | - | - | - |
|
||||
| Multi-viewer | ✓ | - | - | - | ✓ |
|
||||
| Typical latency | 50-150ms | - | 100-500ms | - | 200-1000ms |
|
||||
|
||||
## Browser Automation
|
||||
|
||||
| Feature | Sandbox Agent | Cloudflare | Browserbase | Steel | Stagehand |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Start/stop browser | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| CDP WebSocket access | ✓ | ✓ | ✓ | ✓ | - |
|
||||
| Navigate / back / forward | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| Tab management | ✓ | - | ✓ | - | - |
|
||||
| Click / type / scroll (selector) | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| Screenshot (browser-level) | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| PDF generation | ✓ | ✓ | ✓ | ✓ | - |
|
||||
| Get page HTML | ✓ | ✓ | ✓ | - | - |
|
||||
| Get page as Markdown | ✓ | ✓ | - | - | - |
|
||||
| Scrape elements (selectors) | ✓ | ✓ | ✓ | - | - |
|
||||
| Extract all links | ✓ | ✓ | - | - | - |
|
||||
| Accessibility tree snapshot | ✓ | - | - | - | ✓ |
|
||||
| Execute JavaScript | ✓ | - | ✓ | ✓ | - |
|
||||
| Console log capture | ✓ | - | ✓ | - | - |
|
||||
| Network request capture | ✓ | - | ✓ | - | - |
|
||||
| Web crawling | ✓ | ✓ | - | - | - |
|
||||
| Persistent browser profiles | ✓ | - | ✓ | ✓ | - |
|
||||
| Cookie management | ✓ | - | ✓ | ✓ | - |
|
||||
| File upload to input | ✓ | - | ✓ | ✓ | - |
|
||||
| Dialog handling | ✓ | - | ✓ | - | - |
|
||||
| Live browser streaming | ✓ (WebRTC) | - | ✓ (CDP) | ✓ | - |
|
||||
| Anti-detection/stealth | - | - | ✓ | ✓ | - |
|
||||
| Proxy support | - | - | ✓ | ✓ | - |
|
||||
| CAPTCHA solving | - | - | ✓ | - | - |
|
||||
| Browser extensions | - | - | ✓ | - | - |
|
||||
|
||||
## Agent Integration
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| Agent Client Protocol (ACP) | ✓ | - | - | - | - |
|
||||
| MCP server config | ✓ | - | - | - | - |
|
||||
| Skills config | ✓ | - | - | - | - |
|
||||
| Agent install/management | ✓ | - | - | - | - |
|
||||
| Session persistence | ✓ | - | - | - | - |
|
||||
| Permission system | ✓ | - | - | - | - |
|
||||
| Code interpreter | - | - | ✓ | - | - |
|
||||
|
||||
## SDKs and Tooling
|
||||
|
||||
| Feature | Sandbox Agent | Daytona | E2B | Cloudflare | Browserbase |
|
||||
|---------|:---:|:---:|:---:|:---:|:---:|
|
||||
| TypeScript SDK | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| Python SDK | - | ✓ | ✓ | - | ✓ |
|
||||
| React components | ✓ | - | - | - | - |
|
||||
| Inspector UI | ✓ | - | - | - | - |
|
||||
| Provider abstraction (7+) | ✓ | - | - | - | - |
|
||||
| WebRTC client library | ✓ | - | - | - | - |
|
||||
| CLI | ✓ | ✓ | ✓ | ✓ | - |
|
||||
|
||||
## Streaming Technology Comparison
|
||||
|
||||
For platforms that support live desktop/browser streaming, here is how the underlying technologies compare:
|
||||
|
||||
| Dimension | WebRTC (Neko) | VNC (noVNC) | CDP Screencast | WebSocket + JPEG |
|
||||
|-----------|:---:|:---:|:---:|:---:|
|
||||
| Typical latency | 50-150ms | 100-500ms | 200-1000ms | 150-400ms |
|
||||
| Frame rate | 30-60 fps | 10-30 fps | 1-15 fps | 5-20 fps |
|
||||
| Video quality | High | Medium | Low-Medium | Medium |
|
||||
| Audio support | Yes | No | No | No |
|
||||
| Interactive input | Full | Full | Limited | Limited |
|
||||
| Bandwidth (adaptive) | Yes | No | No | No |
|
||||
| Used by | Sandbox Agent | E2B, Gitpod | Browserbase | Various |
|
||||
|
||||
Sandbox Agent uses [Neko](https://github.com/m1k1o/neko) (WebRTC) for streaming, which provides the lowest latency and best interactivity of any approach. The same stream serves both the full desktop and browser automation modes.
|
||||
|
||||
|
|
@ -103,6 +103,7 @@
|
|||
{
|
||||
"group": "More",
|
||||
"pages": [
|
||||
"browser-feature-matrix",
|
||||
"daemon",
|
||||
"cors",
|
||||
"session-restoration",
|
||||
|
|
|
|||
1442
notes/specs/browser-automation-spec.md
Normal file
1442
notes/specs/browser-automation-spec.md
Normal file
File diff suppressed because it is too large
Load diff
1
scripts/ralph/.last-branch
Normal file
1
scripts/ralph/.last-branch
Normal file
|
|
@ -0,0 +1 @@
|
|||
ralph/browser-automation
|
||||
104
scripts/ralph/CLAUDE.md
Normal file
104
scripts/ralph/CLAUDE.md
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# Ralph Agent Instructions
|
||||
|
||||
You are an autonomous coding agent working on a software project.
|
||||
|
||||
## Your Task
|
||||
|
||||
1. Read the PRD at `prd.json` (in the same directory as this file)
|
||||
2. Read the progress log at `progress.txt` (check Codebase Patterns section first)
|
||||
3. Check you're on the correct branch from PRD `branchName`. If not, check it out or create from main.
|
||||
4. Pick the **highest priority** user story where `passes: false`
|
||||
5. Implement that single user story
|
||||
6. Run quality checks (e.g., typecheck, lint, test - use whatever your project requires)
|
||||
7. Update CLAUDE.md files if you discover reusable patterns (see below)
|
||||
8. If checks pass, commit ALL changes with message: `feat: [Story ID] - [Story Title]`
|
||||
9. Update the PRD to set `passes: true` for the completed story
|
||||
10. Append your progress to `progress.txt`
|
||||
|
||||
## Progress Report Format
|
||||
|
||||
APPEND to progress.txt (never replace, always append):
|
||||
```
|
||||
## [Date/Time] - [Story ID]
|
||||
- What was implemented
|
||||
- Files changed
|
||||
- **Learnings for future iterations:**
|
||||
- Patterns discovered (e.g., "this codebase uses X for Y")
|
||||
- Gotchas encountered (e.g., "don't forget to update Z when changing W")
|
||||
- Useful context (e.g., "the evaluation panel is in component X")
|
||||
---
|
||||
```
|
||||
|
||||
The learnings section is critical - it helps future iterations avoid repeating mistakes and understand the codebase better.
|
||||
|
||||
## Consolidate Patterns
|
||||
|
||||
If you discover a **reusable pattern** that future iterations should know, add it to the `## Codebase Patterns` section at the TOP of progress.txt (create it if it doesn't exist). This section should consolidate the most important learnings:
|
||||
|
||||
```
|
||||
## Codebase Patterns
|
||||
- Example: Use `sql<number>` template for aggregations
|
||||
- Example: Always use `IF NOT EXISTS` for migrations
|
||||
- Example: Export types from actions.ts for UI components
|
||||
```
|
||||
|
||||
Only add patterns that are **general and reusable**, not story-specific details.
|
||||
|
||||
## Update CLAUDE.md Files
|
||||
|
||||
Before committing, check if any edited files have learnings worth preserving in nearby CLAUDE.md files:
|
||||
|
||||
1. **Identify directories with edited files** - Look at which directories you modified
|
||||
2. **Check for existing CLAUDE.md** - Look for CLAUDE.md in those directories or parent directories
|
||||
3. **Add valuable learnings** - If you discovered something future developers/agents should know:
|
||||
- API patterns or conventions specific to that module
|
||||
- Gotchas or non-obvious requirements
|
||||
- Dependencies between files
|
||||
- Testing approaches for that area
|
||||
- Configuration or environment requirements
|
||||
|
||||
**Examples of good CLAUDE.md additions:**
|
||||
- "When modifying X, also update Y to keep them in sync"
|
||||
- "This module uses pattern Z for all API calls"
|
||||
- "Tests require the dev server running on PORT 3000"
|
||||
- "Field names must match the template exactly"
|
||||
|
||||
**Do NOT add:**
|
||||
- Story-specific implementation details
|
||||
- Temporary debugging notes
|
||||
- Information already in progress.txt
|
||||
|
||||
Only update CLAUDE.md if you have **genuinely reusable knowledge** that would help future work in that directory.
|
||||
|
||||
## Quality Requirements
|
||||
|
||||
- ALL commits must pass your project's quality checks (typecheck, lint, test)
|
||||
- Do NOT commit broken code
|
||||
- Keep changes focused and minimal
|
||||
- Follow existing code patterns
|
||||
|
||||
## Browser Testing (If Available)
|
||||
|
||||
For any story that changes UI, verify it works in the browser if you have browser testing tools configured (e.g., via MCP):
|
||||
|
||||
1. Navigate to the relevant page
|
||||
2. Verify the UI changes work as expected
|
||||
3. Take a screenshot if helpful for the progress log
|
||||
|
||||
If no browser tools are available, note in your progress report that manual browser verification is needed.
|
||||
|
||||
## Stop Condition
|
||||
|
||||
After completing a user story, check if ALL stories have `passes: true`.
|
||||
|
||||
If ALL stories are complete and passing, reply with:
|
||||
<promise>COMPLETE</promise>
|
||||
|
||||
If there are still stories with `passes: false`, end your response normally (another iteration will pick up the next story).
|
||||
|
||||
## Important
|
||||
|
||||
- Work on ONE story per iteration
|
||||
- Commit frequently
|
||||
- Keep CI green
|
||||
- Read the Codebase Patterns section in progress.txt before starting
|
||||
|
|
@ -278,8 +278,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 17,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "GET uses Network.getCookies with optional urls param. POST uses Network.setCookies with cookie array. DELETE uses Network.clearBrowserCookies (no filter) or Network.getCookies + Network.deleteCookies (with name/domain filter)."
|
||||
},
|
||||
{
|
||||
"id": "US-018",
|
||||
|
|
@ -295,8 +295,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 18,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "BFS crawl uses CDP Page.navigate + Runtime.evaluate for each page. Content extraction supports 4 modes (markdown/html/text/links). URL dedup via fragment-stripped normalization. Domain filtering via url crate. Added url.workspace = true dependency."
|
||||
},
|
||||
{
|
||||
"id": "US-019",
|
||||
|
|
@ -308,8 +308,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 19,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "Types added to existing types.ts (not a new types/browser.ts) following the SDK's established pattern of extracting type aliases from the generated OpenAPI types. Regenerated openapi.json and openapi.ts to include browser operations."
|
||||
},
|
||||
{
|
||||
"id": "US-020",
|
||||
|
|
@ -323,8 +323,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 20,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "Methods follow exact same patterns as desktop counterparts. getBrowserCdpUrl() uses toWebSocketUrl() + buildUrl() with access_token query param, same as buildDesktopStreamWebSocketUrl()."
|
||||
},
|
||||
{
|
||||
"id": "US-021",
|
||||
|
|
@ -343,8 +343,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 21,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "Methods follow same requestJson pattern as lifecycle methods. Type imports added alphabetically. closeBrowserTab uses DELETE method. createBrowserTab and browserReload have optional request params."
|
||||
},
|
||||
{
|
||||
"id": "US-022",
|
||||
|
|
@ -362,7 +362,7 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 22,
|
||||
"passes": false,
|
||||
"passes": true,
|
||||
"notes": ""
|
||||
},
|
||||
{
|
||||
|
|
@ -380,8 +380,8 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 23,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "All 7 interaction methods follow the same requestJson POST pattern with BrowserActionResponse return type. Type imports added alphabetically."
|
||||
},
|
||||
{
|
||||
"id": "US-024",
|
||||
|
|
@ -400,7 +400,7 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 24,
|
||||
"passes": false,
|
||||
"passes": true,
|
||||
"notes": ""
|
||||
},
|
||||
{
|
||||
|
|
@ -416,7 +416,7 @@
|
|||
"Typecheck passes"
|
||||
],
|
||||
"priority": 25,
|
||||
"passes": false,
|
||||
"passes": true,
|
||||
"notes": ""
|
||||
},
|
||||
{
|
||||
|
|
@ -433,8 +433,8 @@
|
|||
"Verify in browser using dev-browser skill"
|
||||
],
|
||||
"priority": 26,
|
||||
"passes": false,
|
||||
"notes": "Follow DesktopTab.tsx patterns for card layout and state management"
|
||||
"passes": true,
|
||||
"notes": "Follows DesktopTab.tsx patterns for card layout and state management. BrowserViewerClient used for live view with DesktopViewer component. Navigation bar with back/forward/reload + URL input. Context dropdown populated from getBrowserContexts(). Auto-refresh every 5s when active. BrowserStartRequest doesn't have 'streaming' field - removed it."
|
||||
},
|
||||
{
|
||||
"id": "US-027",
|
||||
|
|
@ -448,8 +448,8 @@
|
|||
"Verify in browser using dev-browser skill"
|
||||
],
|
||||
"priority": 27,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "Screenshot uses createScreenshotUrl blob pattern from DesktopTab. Tabs reuse desktop-window-item/desktop-window-focused CSS classes. Console auto-refreshes every 3s with level filter pills. All three sections conditionally rendered only when isActive."
|
||||
},
|
||||
{
|
||||
"id": "US-028",
|
||||
|
|
@ -465,8 +465,8 @@
|
|||
"Verify in browser using dev-browser skill"
|
||||
],
|
||||
"priority": 28,
|
||||
"passes": false,
|
||||
"notes": ""
|
||||
"passes": true,
|
||||
"notes": "Network section auto-refreshes every 3s with URL pattern filter. Content Tools has Get HTML/Markdown/Links/Snapshot buttons with output textarea. Recording reuses desktop recording API (startDesktopRecording/stopDesktopRecording/listDesktopRecordings). Contexts section with list/create/delete/Use button sets contextId for next browser start. Diagnostics shows lastError and process list from BrowserStatusResponse."
|
||||
},
|
||||
{
|
||||
"id": "US-029",
|
||||
|
|
@ -486,8 +486,128 @@
|
|||
"Tests pass"
|
||||
],
|
||||
"priority": 29,
|
||||
"passes": true,
|
||||
"notes": "Run with: cargo test -p sandbox-agent --test browser_api. Fixed CdpClient to connect to page endpoint instead of browser endpoint for Page/Runtime/DOM commands. Chromium added to test Docker image."
|
||||
},
|
||||
{
|
||||
"id": "US-030",
|
||||
"title": "Fix crawl page load: replace sleep with readyState polling",
|
||||
"description": "As a user, I need the crawl endpoint to reliably wait for pages to load instead of using a fixed 500ms sleep.",
|
||||
"acceptanceCriteria": [
|
||||
"In browser_crawl.rs, replace `tokio::time::sleep(Duration::from_millis(500))` after Page.navigate with a polling loop that checks `document.readyState === 'complete'` via Runtime.evaluate",
|
||||
"Polling should check every 100ms with a configurable timeout (default 10s)",
|
||||
"If timeout is reached, proceed with extraction anyway (don't fail the crawl)",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 30,
|
||||
"passes": true,
|
||||
"notes": "Current code at browser_crawl.rs:61 uses a hard-coded 500ms sleep which is too short for slow pages and wastes time on fast pages."
|
||||
},
|
||||
{
|
||||
"id": "US-031",
|
||||
"title": "Fix crawl navigation status: use real HTTP status instead of faked 200",
|
||||
"description": "As a user, I need crawl results to report the actual HTTP status code, not always 200.",
|
||||
"acceptanceCriteria": [
|
||||
"In browser_crawl.rs, enable Network domain (Network.enable) before crawling",
|
||||
"Capture the actual HTTP status from Network.responseReceived for each navigated page",
|
||||
"Replace the faked `nav_result.get(\"frameId\").map(|_| 200u16)` with the real status",
|
||||
"If Page.navigate returns an errorText field, record the page with status None and skip link extraction",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 31,
|
||||
"passes": true,
|
||||
"notes": "Enabled Network domain, subscribed to Network.responseReceived events, drain buffered events after readyState complete to get real HTTP status. Handles errorText from Page.navigate by recording None status and skipping extraction. Takes last Document response to handle redirect chains."
|
||||
},
|
||||
{
|
||||
"id": "US-032",
|
||||
"title": "Remove dead cdp_client() method from BrowserRuntime",
|
||||
"description": "As a developer, I want to remove dead code that always returns an error.",
|
||||
"acceptanceCriteria": [
|
||||
"Remove the `pub async fn cdp_client()` method from BrowserRuntime in browser_runtime.rs that always returns Err('Use with_cdp() to execute CDP commands')",
|
||||
"Verify no callers reference cdp_client() (only get_cdp() and with_cdp() should be used)",
|
||||
"If any callers exist, migrate them to get_cdp()",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 32,
|
||||
"passes": true,
|
||||
"notes": "browser_runtime.rs:553-564 always returns an error telling callers to use with_cdp(). This is dead code that confuses the API surface."
|
||||
},
|
||||
{
|
||||
"id": "US-033",
|
||||
"title": "Fix default display dimensions to match spec (1280x720)",
|
||||
"description": "As a developer, I need the default browser dimensions to match the spec.",
|
||||
"acceptanceCriteria": [
|
||||
"Change DEFAULT_WIDTH from 1440 to 1280 in browser_runtime.rs",
|
||||
"Change DEFAULT_HEIGHT from 900 to 720 in browser_runtime.rs",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 33,
|
||||
"passes": false,
|
||||
"notes": "Run with: cargo test -p sandbox-agent --test browser_api"
|
||||
"notes": "Spec section 3.1 says defaults are 1280x720 but browser_runtime.rs uses 1440x900."
|
||||
},
|
||||
{
|
||||
"id": "US-034",
|
||||
"title": "Add reverse mutual exclusivity check in DesktopRuntime",
|
||||
"description": "As a developer, I need DesktopRuntime to reject start when BrowserRuntime is active.",
|
||||
"acceptanceCriteria": [
|
||||
"In DesktopRuntime.start(), check if BrowserRuntime is active before proceeding",
|
||||
"If BrowserRuntime state is Active, return a 409 Conflict error with message explaining browser and desktop modes are mutually exclusive",
|
||||
"This mirrors the existing check in BrowserRuntime.start() that checks DesktopRuntime",
|
||||
"BrowserRuntime may need to be added to DesktopRuntime's constructor or accessed via shared app state",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 34,
|
||||
"passes": false,
|
||||
"notes": "Currently BrowserRuntime checks DesktopRuntime before starting, but DesktopRuntime does not check BrowserRuntime. This is a one-sided guard."
|
||||
},
|
||||
{
|
||||
"id": "US-035",
|
||||
"title": "Fix BrowserProblem misuse: use correct error variants for non-startup failures",
|
||||
"description": "As a developer, I need error variants to be used correctly so error codes are meaningful.",
|
||||
"acceptanceCriteria": [
|
||||
"In browser_context.rs, change delete_context's fs::remove_dir_all error from BrowserProblem::start_failed to a more appropriate variant (e.g. cdp_error or add a new internal_error variant)",
|
||||
"In browser_context.rs, change list_contexts's fs::read_dir error from BrowserProblem::start_failed similarly",
|
||||
"In browser_context.rs, change create_context's fs errors from BrowserProblem::start_failed similarly",
|
||||
"Fix the no-op comment at browser_runtime.rs console event handler: 'CDP uses warning as type but we normalize to warning' (same value, comment is misleading - either remove the comment or actually normalize 'warning' to 'warn')",
|
||||
"Typecheck passes"
|
||||
],
|
||||
"priority": 35,
|
||||
"passes": false,
|
||||
"notes": "BrowserProblem::start_failed (500 status, browser/start-failed code) is used as a catch-all for filesystem errors in browser_context.rs which makes error codes meaningless for API consumers."
|
||||
},
|
||||
{
|
||||
"id": "US-036",
|
||||
"title": "Add integration tests for console and network monitoring",
|
||||
"description": "As a developer, I need tests that verify console and network monitoring actually capture events.",
|
||||
"acceptanceCriteria": [
|
||||
"Add test v1_browser_console_monitoring to browser_api.rs",
|
||||
"Test navigates to a page that calls console.log('test-message') and console.error('test-error')",
|
||||
"Test calls GET /v1/browser/console and verifies the messages array contains entries with matching text and correct levels",
|
||||
"Test calls GET /v1/browser/console?level=error and verifies only error-level messages are returned",
|
||||
"Add test v1_browser_network_monitoring to browser_api.rs",
|
||||
"Test navigates to a page, then calls GET /v1/browser/network and verifies at least one request entry exists with a url, method, and status",
|
||||
"Tests pass"
|
||||
],
|
||||
"priority": 36,
|
||||
"passes": false,
|
||||
"notes": "Console and network monitoring have real complexity (background tasks, ring buffers, event correlation) but zero test coverage currently."
|
||||
},
|
||||
{
|
||||
"id": "US-037",
|
||||
"title": "Add integration tests for crawling",
|
||||
"description": "As a developer, I need tests that verify the crawl endpoint works with multiple pages.",
|
||||
"acceptanceCriteria": [
|
||||
"Add test v1_browser_crawl to browser_api.rs",
|
||||
"Write 3 test HTML pages: page-a.html links to page-b.html, page-b.html links to page-c.html, page-c.html has no links",
|
||||
"Test POST /v1/browser/crawl with url=file:///tmp/page-a.html, maxDepth=2, extract=text",
|
||||
"Verify response has 3 pages with correct depths (0, 1, 2)",
|
||||
"Verify totalPages is 3 and truncated is false",
|
||||
"Test maxPages=1 returns only 1 page and truncated is true",
|
||||
"Tests pass"
|
||||
],
|
||||
"priority": 37,
|
||||
"passes": false,
|
||||
"notes": "Crawl has real logic (BFS, domain filtering, depth limits, URL normalization) but no test coverage."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,10 @@
|
|||
- `BrowserRuntime::ensure_active()` is a reusable guard for any handler requiring active browser state
|
||||
- `BrowserRuntime::get_cdp()` returns `Arc<CdpClient>` without holding state lock; preferred over `with_cdp()` closure for handlers that do multiple async CDP calls (avoids lifetime issues)
|
||||
- `CdpClient::close()` takes `&self` (not `self`); CdpClient is stored as `Option<Arc<CdpClient>>` in BrowserRuntimeStateData
|
||||
- CdpClient MUST connect to a page endpoint (`/json/list` → first page's `webSocketDebuggerUrl`), NOT the browser endpoint from `/json/version`. Page/Runtime/DOM commands only work on page-level connections.
|
||||
- Integration tests use Docker containers via `TestApp::new(AuthConfig::disabled())` from `support/docker.rs`; `#[serial]` for sequential execution
|
||||
- Test helper `write_test_file()` uses `PUT /v1/fs/file?path=...` to write HTML test fixtures into the container
|
||||
- `docker/test-agent/Dockerfile` must include chromium + deps (libnss3, libatk-bridge2.0-0, libdrm2, libxcomposite1, libxdamage1, libxrandr2, libgbm1, libasound2, libpangocairo-1.0-0, libgtk-3-0) for browser integration tests
|
||||
- `get_page_info_via_cdp()` is a helper fn in router.rs for getting current URL and title via Runtime.evaluate
|
||||
- CDP `Page.getNavigationHistory` returns `{currentIndex, entries: [{id, url, title}]}` for back/forward navigation
|
||||
- CDP `Page.navigateToHistoryEntry` takes `{entryId}` (the id from history entries, not the index)
|
||||
|
|
@ -43,6 +47,21 @@
|
|||
- For internal-only fields in API types, use `#[serde(default, skip_serializing)]` to keep them out of JSON responses
|
||||
- Browser context management is pure filesystem CRUD; each context is a directory under `{state_dir}/browser-contexts/{id}/` with a `context.json` metadata file
|
||||
- Use hex-encoded /dev/urandom bytes for generating IDs (same pattern as telemetry.rs) to avoid adding new crate deps
|
||||
- CDP `Network.getCookies`/`setCookies`/`deleteCookies`/`clearBrowserCookies` for cookie CRUD; sameSite values are capitalized strings ("Strict", "Lax", "None")
|
||||
- For complex multi-page logic (e.g., crawl), put business logic in a separate module file and call it from the router handler; keeps router.rs manageable
|
||||
- `url` crate available as workspace dependency for URL parsing/domain extraction
|
||||
- TypeScript SDK types pipeline: (1) `cargo run -p sandbox-agent-openapi-gen -- --out docs/openapi.json`, (2) `npx openapi-typescript docs/openapi.json -o src/generated/openapi.ts && node ./scripts/patch-openapi-types.mjs`, (3) add type aliases in `types.ts` using `JsonResponse`/`JsonRequestBody`/`QueryParams` utilities, (4) export from `index.ts`
|
||||
- TypeScript SDK types are extracted from generated OpenAPI types, NOT manually written interfaces; operation IDs follow `{method}_v1_{domain}_{action}` pattern
|
||||
- For QueryParams types that might resolve to `never`, use defensive pattern: `QueryParams<T> extends never ? Record<string, never> : QueryParams<T>`
|
||||
- SDK method patterns: `requestJson("GET"|"POST", path)` for JSON, `requestRaw("GET", path, {query, accept})` for binary, `toWebSocketUrl(buildUrl(path, {access_token}))` for WS URLs; type imports go alphabetically in the `import { ... } from "./types.ts"` block
|
||||
- SDK binary response pattern (screenshot/pdf): `requestRaw("GET", path, {query, accept: "image/*"})` → `response.arrayBuffer()` → `new Uint8Array(buffer)`
|
||||
- SDK content extraction methods: `requestJson("GET", path, {query})` for JSON endpoints, `requestRaw("GET", path, {query, accept})` for binary; query param types use the defensive `extends never` pattern
|
||||
- React SDK components use inline CSSProperties styles (no CSS modules or Tailwind), with base shell/status/viewport styles as const objects
|
||||
- React SDK `BrowserViewerClient`/`DesktopViewerClient` use `Pick<SandboxAgent, ...>` for loose coupling; when adding new components that depend on SDK methods, the TypeScript SDK dist must be rebuilt first (`npx tsup` in sdks/typescript/) before React SDK typecheck passes
|
||||
- React SDK barrel exports are alphabetically ordered; component exports first, then type exports grouped by source file
|
||||
- Inspector debug tab pattern: (1) add to `DebugTab` union in DebugPanel.tsx, (2) import component, (3) add icon button in tabs section, (4) add conditional render `{debugTab === "x" && <XTab getClient={getClient} />}` in content section
|
||||
- Inspector tab components reuse `desktop-panel`, `desktop-state-grid`, `desktop-start-controls`, `desktop-input-group`, `card`, `card-header`, `card-meta`, `card-actions` CSS classes
|
||||
- `Parameters<SandboxAgent["methodName"]>[0]` derives request types from SDK method signatures in inspector components
|
||||
|
||||
# Ralph Progress Log
|
||||
Started: Tue Mar 17 04:32:06 AM PDT 2026
|
||||
|
|
@ -308,3 +327,283 @@ Started: Tue Mar 17 04:32:06 AM PDT 2026
|
|||
- Context types (BrowserContextInfo, BrowserContextListResponse, BrowserContextCreateRequest) were already defined in browser_types.rs from US-003
|
||||
- `tempfile` crate is a workspace dev-dependency available via `test-utils` feature flag
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-017
|
||||
- Implemented 3 browser cookie management HTTP endpoints: GET /v1/browser/cookies, POST /v1/browser/cookies, DELETE /v1/browser/cookies
|
||||
- GET /v1/browser/cookies: accepts optional `url` query param; uses CDP `Network.getCookies` with optional `urls` array; maps CDP cookie fields (httpOnly, sameSite) to BrowserCookie struct
|
||||
- POST /v1/browser/cookies: accepts `{cookies: [...]}` body; maps BrowserCookie fields to CDP format; uses CDP `Network.setCookies`
|
||||
- DELETE /v1/browser/cookies: accepts optional `name`, `domain` query params; if no filters, uses `Network.clearBrowserCookies`; if filtered, fetches all cookies via `Network.getCookies`, matches by name/domain, deletes each via `Network.deleteCookies`
|
||||
- Routes registered with combined `get().post().delete()` on single `/browser/cookies` path
|
||||
- OpenAPI paths and schemas registered for all 3 handlers and all cookie types (BrowserCookie, BrowserCookieSameSite, BrowserCookiesQuery, BrowserCookiesResponse, BrowserSetCookiesRequest, BrowserDeleteCookiesQuery)
|
||||
- Files changed: router.rs
|
||||
- **Learnings for future iterations:**
|
||||
- CDP `Network.getCookies` takes `{urls?: [string]}` and returns `{cookies: [{name, value, domain, path, expires, httpOnly, secure, sameSite, ...}]}`
|
||||
- CDP `Network.setCookies` takes `{cookies: [{name, value, domain?, path?, expires?, httpOnly?, secure?, sameSite?}]}`
|
||||
- CDP `Network.deleteCookies` takes `{name, domain?, path?}` to delete specific cookies
|
||||
- CDP `Network.clearBrowserCookies` takes no params and clears all cookies
|
||||
- CDP cookie `sameSite` values are "Strict", "Lax", "None" (capitalized strings)
|
||||
- CDP cookie `expires` is 0 for session cookies; filter with `> 0.0` before returning
|
||||
- For delete with filters, must first fetch all cookies then match and delete individually (CDP has no bulk-filter-delete)
|
||||
- Axum route `.get().post().delete()` chaining works for registering multiple HTTP methods on same path
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-018
|
||||
- Created `browser_crawl.rs` with BFS crawl implementation using CDP
|
||||
- POST /v1/browser/crawl: accepts `{url, maxPages?, maxDepth?, allowedDomains?, extract?}`
|
||||
- Returns `{pages: [{url, title, content, links, status, depth}], totalPages, truncated}`
|
||||
- 4 content extraction modes: markdown (strips nav/footer/aside, uses html2md), html (outerHTML), text (innerText), links (empty content, links in links field)
|
||||
- BFS queue with visited set for URL deduplication (fragment-stripped normalization)
|
||||
- Domain filtering via `url` crate; defaults to same-domain-only if no allowedDomains specified
|
||||
- maxPages default 10, capped at 100; maxDepth default 2
|
||||
- Added `url.workspace = true` dependency to sandbox-agent Cargo.toml
|
||||
- Route registered at `/browser/crawl` in v1_router, OpenAPI paths and schemas registered
|
||||
- Files changed: browser_crawl.rs (new), Cargo.toml, lib.rs, router.rs
|
||||
- **Learnings for future iterations:**
|
||||
- `url` crate (v2.5) is a workspace dependency, just add `url.workspace = true` to package Cargo.toml
|
||||
- `Url::parse()` + `host_str()` is the clean way to extract domains from URLs for filtering
|
||||
- Crawl logic is kept in a separate module (browser_crawl.rs) rather than inline in router.rs since it has substantial business logic
|
||||
- The crawl reuses the same CDP patterns: Page.navigate for navigation, Runtime.evaluate for content extraction, JSON.stringify for link collection
|
||||
- Fragment-stripped URL normalization (`Url::set_fragment(None)`) prevents crawling the same page with different anchors
|
||||
- `truncated` field signals whether there were more pages in the queue when max_pages was reached
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-019
|
||||
- Added 55 browser type aliases to `sdks/typescript/src/types.ts` following existing desktop type pattern
|
||||
- Regenerated `docs/openapi.json` from Rust server (now includes all browser endpoints)
|
||||
- Regenerated `sdks/typescript/src/generated/openapi.ts` via `openapi-typescript`
|
||||
- Exported all browser types from `sdks/typescript/src/index.ts` barrel file
|
||||
- Types cover: lifecycle (BrowserState, BrowserStartRequest, BrowserStatusResponse), navigation (BrowserNavigateRequest, BrowserPageInfo, BrowserWaitRequest/Response), tabs (BrowserTabInfo, BrowserTabListResponse, BrowserCreateTabRequest), screenshots/PDF (BrowserScreenshotQuery/Format, BrowserPdfQuery/Format), content extraction (BrowserContentQuery/Response, BrowserMarkdownResponse, BrowserLinksResponse, BrowserSnapshotResponse), scrape/execute (BrowserScrapeRequest/Response, BrowserExecuteRequest/Response), interaction (BrowserClickRequest, BrowserTypeRequest, BrowserSelectRequest, BrowserHoverRequest, BrowserScrollRequest, BrowserUploadRequest, BrowserDialogRequest, BrowserActionResponse), monitoring (BrowserConsoleQuery/Message/Response, BrowserNetworkQuery/Request/Response), crawl (BrowserCrawlRequest/Page/Response/Extract), contexts (BrowserContextInfo/ListResponse/CreateRequest), cookies (BrowserCookie/SameSite, BrowserCookiesQuery/Response, BrowserSetCookiesRequest, BrowserDeleteCookiesQuery)
|
||||
- Files changed: types.ts, index.ts, generated/openapi.ts, docs/openapi.json
|
||||
- **Learnings for future iterations:**
|
||||
- TypeScript SDK types are NOT manually written interfaces; they're type aliases extracted from generated OpenAPI types using `JsonResponse`, `JsonRequestBody`, `QueryParams` generic utilities
|
||||
- Must regenerate OpenAPI pipeline first: `cargo run -p sandbox-agent-openapi-gen -- --out docs/openapi.json` then `npx openapi-typescript ... -o src/generated/openapi.ts && node ./scripts/patch-openapi-types.mjs`
|
||||
- For query param types that might resolve to `never`, use the `extends never ? Record<string, never> : ...` pattern (see DesktopScreenshotQuery)
|
||||
- biome pre-commit hook auto-formats; files may be reformatted on commit
|
||||
- Operation IDs follow pattern: `{method}_v1_browser_{action}` (e.g., `post_v1_browser_start`, `get_v1_browser_status`)
|
||||
- Component schemas use the exact Rust struct name (e.g., `BrowserStartRequest`, `BrowserState`)
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-020
|
||||
- Added 4 browser lifecycle/CDP methods to SandboxAgent class in sdks/typescript/src/client.ts:
|
||||
- `startBrowser(request?)` → POST /v1/browser/start
|
||||
- `stopBrowser()` → POST /v1/browser/stop
|
||||
- `getBrowserStatus()` → GET /v1/browser/status
|
||||
- `getBrowserCdpUrl(options?)` → builds ws:// URL for /v1/browser/cdp with access_token
|
||||
- Imported `BrowserStartRequest` and `BrowserStatusResponse` types from types.ts
|
||||
- Methods placed after desktop stream methods, before private getLiveConnection
|
||||
- Files changed: client.ts
|
||||
- **Learnings for future iterations:**
|
||||
- SDK methods follow 1:1 pattern with desktop counterparts: `requestJson("METHOD", path, {body/query})` for JSON, `toWebSocketUrl(buildUrl(...))` for WS URLs
|
||||
- Type imports are added alphabetically in the main `import { ... } from "./types.ts"` block
|
||||
- `getBrowserCdpUrl()` is sync (not async) since it just constructs a URL, same as `buildDesktopStreamWebSocketUrl()`
|
||||
- Reuses `ProcessTerminalWebSocketUrlOptions` type for the options param (contains `accessToken?: string`)
|
||||
- biome pre-commit formats automatically; no manual formatting needed
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-021
|
||||
- Added 9 browser navigation and tab methods to SandboxAgent class in sdks/typescript/src/client.ts:
|
||||
- `browserNavigate(request)` → POST /v1/browser/navigate → BrowserPageInfo
|
||||
- `browserBack()` → POST /v1/browser/back → BrowserPageInfo
|
||||
- `browserForward()` → POST /v1/browser/forward → BrowserPageInfo
|
||||
- `browserReload(request?)` → POST /v1/browser/reload → BrowserPageInfo
|
||||
- `browserWait(request)` → POST /v1/browser/wait → BrowserWaitResponse
|
||||
- `getBrowserTabs()` → GET /v1/browser/tabs → BrowserTabListResponse
|
||||
- `createBrowserTab(request?)` → POST /v1/browser/tabs → BrowserTabInfo
|
||||
- `activateBrowserTab(tabId)` → POST /v1/browser/tabs/:id/activate → BrowserTabInfo
|
||||
- `closeBrowserTab(tabId)` → DELETE /v1/browser/tabs/:id → BrowserActionResponse
|
||||
- Added 9 type imports alphabetically: BrowserActionResponse, BrowserCreateTabRequest, BrowserNavigateRequest, BrowserPageInfo, BrowserReloadRequest, BrowserTabInfo, BrowserTabListResponse, BrowserWaitRequest, BrowserWaitResponse
|
||||
- Files changed: client.ts
|
||||
- **Learnings for future iterations:**
|
||||
- Navigation methods (back/forward/reload) have no required request body, but reload accepts optional BrowserReloadRequest
|
||||
- Tab methods use path params for tab IDs: `/browser/tabs/${tabId}/activate` and `/browser/tabs/${tabId}`
|
||||
- createBrowserTab request body is optional (defaults to empty tab)
|
||||
- closeBrowserTab returns BrowserActionResponse ({ok: true}), not BrowserTabInfo
|
||||
- DELETE HTTP method works with requestJson same as GET/POST
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-022
|
||||
- Added 8 browser content extraction methods to SandboxAgent class in sdks/typescript/src/client.ts:
|
||||
- `takeBrowserScreenshot(query?)` → GET /v1/browser/screenshot → Uint8Array (binary, requestRaw)
|
||||
- `getBrowserPdf(query?)` → GET /v1/browser/pdf → Uint8Array (binary, requestRaw with accept: "application/pdf")
|
||||
- `getBrowserContent(query?)` → GET /v1/browser/content → BrowserContentResponse
|
||||
- `getBrowserMarkdown()` → GET /v1/browser/markdown → BrowserMarkdownResponse
|
||||
- `scrapeBrowser(request)` → POST /v1/browser/scrape → BrowserScrapeResponse
|
||||
- `getBrowserLinks()` → GET /v1/browser/links → BrowserLinksResponse
|
||||
- `executeBrowserScript(request)` → POST /v1/browser/execute → BrowserExecuteResponse
|
||||
- `getBrowserSnapshot()` → GET /v1/browser/snapshot → BrowserSnapshotResponse
|
||||
- Added 10 type imports alphabetically: BrowserContentQuery, BrowserContentResponse, BrowserExecuteRequest, BrowserExecuteResponse, BrowserLinksResponse, BrowserMarkdownResponse, BrowserPdfQuery, BrowserScreenshotQuery, BrowserScrapeRequest, BrowserScrapeResponse, BrowserSnapshotResponse
|
||||
- Files changed: client.ts
|
||||
- **Learnings for future iterations:**
|
||||
- Screenshot uses `requestRaw` with `accept: "image/*"`, PDF uses `requestRaw` with `accept: "application/pdf"` - both return `Uint8Array` via `response.arrayBuffer()`
|
||||
- Content extraction GET endpoints with optional query params use `requestJson("GET", path, { query })` pattern
|
||||
- Scrape and execute are POST endpoints with required request bodies
|
||||
- getBrowserMarkdown, getBrowserLinks, getBrowserSnapshot have no parameters (simple GET endpoints)
|
||||
- Parameter name is `query` (not `request`) for GET endpoints with query params, matching desktop screenshot pattern
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-023
|
||||
- Added 7 browser interaction methods to SandboxAgent class in sdks/typescript/src/client.ts:
|
||||
- `browserClick(request)` → POST /v1/browser/click → BrowserActionResponse
|
||||
- `browserType(request)` → POST /v1/browser/type → BrowserActionResponse
|
||||
- `browserSelect(request)` → POST /v1/browser/select → BrowserActionResponse
|
||||
- `browserHover(request)` → POST /v1/browser/hover → BrowserActionResponse
|
||||
- `browserScroll(request)` → POST /v1/browser/scroll → BrowserActionResponse
|
||||
- `browserUpload(request)` → POST /v1/browser/upload → BrowserActionResponse
|
||||
- `browserDialog(request)` → POST /v1/browser/dialog → BrowserActionResponse
|
||||
- Added 7 type imports alphabetically: BrowserClickRequest, BrowserDialogRequest, BrowserHoverRequest, BrowserScrollRequest, BrowserSelectRequest, BrowserTypeRequest, BrowserUploadRequest
|
||||
- Files changed: client.ts
|
||||
- **Learnings for future iterations:**
|
||||
- All browser interaction methods follow the exact same pattern: `requestJson("POST", path, { body: request })` returning `BrowserActionResponse`
|
||||
- BrowserActionResponse is shared across all interaction endpoints (already imported from US-021)
|
||||
- Methods placed after content extraction methods and before private getLiveConnection
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-024
|
||||
- Added 9 browser monitoring, crawl, context, and cookie methods to SandboxAgent class in sdks/typescript/src/client.ts:
|
||||
- `getBrowserConsole(query?)` → GET /v1/browser/console → BrowserConsoleResponse
|
||||
- `getBrowserNetwork(query?)` → GET /v1/browser/network → BrowserNetworkResponse
|
||||
- `crawlBrowser(request)` → POST /v1/browser/crawl → BrowserCrawlResponse
|
||||
- `getBrowserContexts()` → GET /v1/browser/contexts → BrowserContextListResponse
|
||||
- `createBrowserContext(request)` → POST /v1/browser/contexts → BrowserContextInfo
|
||||
- `deleteBrowserContext(contextId)` → DELETE /v1/browser/contexts/:id → BrowserActionResponse
|
||||
- `getBrowserCookies(query?)` → GET /v1/browser/cookies → BrowserCookiesResponse
|
||||
- `setBrowserCookies(request)` → POST /v1/browser/cookies → BrowserActionResponse
|
||||
- `deleteBrowserCookies(query?)` → DELETE /v1/browser/cookies → BrowserActionResponse
|
||||
- Added 12 type imports alphabetically: BrowserConsoleQuery, BrowserConsoleResponse, BrowserContextCreateRequest, BrowserContextInfo, BrowserContextListResponse, BrowserCookiesQuery, BrowserCookiesResponse, BrowserCrawlRequest, BrowserCrawlResponse, BrowserDeleteCookiesQuery, BrowserNetworkQuery, BrowserNetworkResponse, BrowserSetCookiesRequest
|
||||
- Files changed: client.ts
|
||||
- **Learnings for future iterations:**
|
||||
- Monitoring endpoints (console/network) use GET with optional query params, same pattern as content extraction
|
||||
- Context CRUD: GET for list, POST for create (returns BrowserContextInfo, not BrowserContextListResponse), DELETE with path param for delete
|
||||
- Cookie methods mirror the Rust HTTP API exactly: GET/POST/DELETE on same /cookies path
|
||||
- deleteBrowserCookies uses query params (not body) for filter criteria, matching the Rust DELETE handler
|
||||
- createBrowserContext returns BrowserContextInfo (single context), not BrowserContextListResponse
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-025
|
||||
- Created `sdks/react/src/BrowserViewer.tsx` with BrowserViewer component that wraps DesktopViewer with a browser navigation bar
|
||||
- BrowserViewerClient type uses `Pick<SandboxAgent, "connectDesktopStream" | "browserNavigate" | "browserBack" | "browserForward" | "browserReload" | "getBrowserStatus">`
|
||||
- BrowserViewerProps: client, className, style, height (default 480), showNavigationBar (default true), showStatusBar (default true), onNavigate, onConnect, onDisconnect, onError
|
||||
- Navigation bar has back/forward/reload buttons and URL input with Enter-to-navigate
|
||||
- URL auto-prefixes https:// if no protocol specified
|
||||
- Syncs URL display from getBrowserStatus() on stream connect
|
||||
- Passes DesktopViewer props with shell styling overridden (no double border/shadow)
|
||||
- Exported BrowserViewer + BrowserViewerClient + BrowserViewerProps from index.ts
|
||||
- Files changed: BrowserViewer.tsx (new), index.ts
|
||||
- **Learnings for future iterations:**
|
||||
- React SDK references `sandbox-agent` via workspace symlink but uses compiled dist types; must rebuild TypeScript SDK (`npx tsup` in sdks/typescript/) after adding new methods before React typecheck works
|
||||
- biome pre-commit reformats: `Pick<>` union types get collapsed to single line, style objects stay as-is
|
||||
- DesktopViewer accepts style prop which can override its shell styling (border, borderRadius, background, boxShadow) - useful for embedding inside a wrapper component
|
||||
- BrowserViewer composes DesktopViewer rather than duplicating WebRTC logic; the stream is the same (Neko on Xvfb display)
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-026
|
||||
- Created `BrowserTab.tsx` in `frontend/packages/inspector/src/components/debug/` with two sections:
|
||||
- Section 1 - Runtime Control: state pill (active/inactive/install_required/failed), status grid (URL, Resolution, Started), config inputs (Width, Height, URL, Context dropdown), Start/Stop buttons, auto-refresh every 5s when active
|
||||
- Section 2 - Live View: navigation bar (Back, Forward, Reload + URL input), DesktopViewer component for WebRTC stream, current URL display
|
||||
- Updated `DebugPanel.tsx`: added `"browser"` to DebugTab type, imported BrowserTab, added Globe icon tab button after Desktop, added render condition
|
||||
- Typecheck passes
|
||||
- Files changed: BrowserTab.tsx (new), DebugPanel.tsx
|
||||
- **Learnings for future iterations:**
|
||||
- Inspector tab pattern: add to DebugTab union type, import component, add button with icon in tabs section, add conditional render in content section
|
||||
- `BrowserStartRequest` does NOT have a `streaming` field (unlike what might be expected); just omit it
|
||||
- `BrowserViewerClient` from `@sandbox-agent/react` uses `Pick<SandboxAgent, ...>` and requires `connectDesktopStream`, `browserNavigate`, `browserBack`, `browserForward`, `browserReload`, `getBrowserStatus`
|
||||
- Reuse `desktop-panel`, `desktop-state-grid`, `desktop-start-controls`, `desktop-input-group` CSS classes from DesktopTab for consistent layout
|
||||
- biome pre-commit hook reformats: ternary chains get collapsed, style objects adjusted
|
||||
- `Parameters<SandboxAgent["methodName"]>[0]` is the pattern for deriving request types from SDK method signatures
|
||||
- Browser contexts are loaded via `getBrowserContexts()` and shown in a dropdown; the contextId is passed to `startBrowser()`
|
||||
- Manual browser verification needed (no browser testing tools available in this environment)
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-027
|
||||
- Implemented Screenshot, Tabs, and Console sections in BrowserTab.tsx
|
||||
- Files changed:
|
||||
- frontend/packages/inspector/src/components/debug/BrowserTab.tsx
|
||||
- **What was implemented:**
|
||||
- Section 3 - Screenshot: format selector (PNG/JPEG/WebP), quality input (hidden for PNG), fullPage checkbox, CSS selector input, capture button with loading state, preview image with blob URL management
|
||||
- Section 4 - Tabs: list of open tabs with URL/title, active tab highlighted with green pill, per-tab Activate/Close buttons, New Tab button with URL input (Enter key support)
|
||||
- Section 5 - Console: level filter pills (All/Log/Warn/Error/Info), scrollable message list with level-colored dot indicators and timestamps, auto-refresh every 3s when active
|
||||
- **Learnings for future iterations:**
|
||||
- `createScreenshotUrl` helper converts Uint8Array to blob URL; must be paired with `revokeScreenshotUrl` for cleanup
|
||||
- `desktop-window-item` and `desktop-window-focused` CSS classes work well for any list item with active state highlighting (not just windows)
|
||||
- `desktop-screenshot-controls` and `desktop-screenshot-frame`/`desktop-screenshot-image` CSS classes are reusable across browser and desktop screenshot sections
|
||||
- Console auto-refresh at 3s interval is distinct from status auto-refresh at 5s; both use the same useEffect + setInterval pattern with cleanup
|
||||
- `getBrowserConsole({ level })` accepts a level filter param; passing empty object gets all levels
|
||||
- Tabs and console are loaded eagerly when browser becomes active via a `status?.state === "active"` useEffect dependency
|
||||
- Manual browser verification needed (no browser testing tools available in this environment)
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-028
|
||||
- Added 5 new sections to BrowserTab.tsx: Network, Content Tools, Recording, Contexts, Diagnostics
|
||||
- Files changed: frontend/packages/inspector/src/components/debug/BrowserTab.tsx
|
||||
- **What was implemented:**
|
||||
- Section 6 - Network: request list with method/URL/status/size/duration, URL pattern filter input, auto-refresh every 3s
|
||||
- Section 7 - Content Tools: Get HTML, Get Markdown, Get Links, Get Snapshot buttons with readonly output textarea
|
||||
- Section 8 - Recording: reuses desktop recording API (startDesktopRecording/stopDesktopRecording/listDesktopRecordings/downloadDesktopRecording/deleteDesktopRecording), FPS input, start/stop buttons, recording list with download/delete, poll while recording active
|
||||
- Section 9 - Contexts: list browser contexts with name/id/size/date, create form, delete button, Use button to set contextId, refresh button
|
||||
- Section 10 - Diagnostics: lastError details (code + message), process list with name/pid/running state/logPath
|
||||
- **Learnings for future iterations:**
|
||||
- Recording is a shared desktop-level feature (Xvfb recording), not browser-specific; browser and desktop tabs share the same recording API
|
||||
- `downloadDesktopRecording` returns `Uint8Array` which needs the same `new Uint8Array(bytes.byteLength); payload.set(bytes)` workaround for Blob creation (TypeScript ArrayBufferLike vs ArrayBuffer type mismatch)
|
||||
- Network requests use `BrowserNetworkRequest` type with `responseSize` and `duration` fields (both nullable)
|
||||
- Content tools reuse existing SDK methods: getBrowserContent, getBrowserMarkdown, getBrowserLinks, getBrowserSnapshot
|
||||
- Context management is available even when browser is not active (filesystem-based), so the Contexts section is always shown
|
||||
- Diagnostics section conditionally renders only when there's data (lastError or processes)
|
||||
- Manual browser verification needed (no browser testing tools available in this environment)
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-029
|
||||
- Implemented browser API integration tests
|
||||
- Files changed:
|
||||
- `docker/test-agent/Dockerfile` - Added chromium and browser dependency packages
|
||||
- `server/packages/sandbox-agent/tests/browser_api.rs` - New integration test file with 7 test functions
|
||||
- `server/packages/sandbox-agent/src/browser_cdp.rs` - Fixed CdpClient to connect to page endpoint instead of browser endpoint
|
||||
- Test coverage:
|
||||
- `v1_browser_status_reports_install_required_when_chromium_missing` - Missing deps detection
|
||||
- `v1_browser_lifecycle_and_navigation` - Start, status, navigate, back, forward, reload, stop
|
||||
- `v1_browser_tabs_management` - List, create, activate, close tabs
|
||||
- `v1_browser_screenshots` - PNG, JPEG, WebP screenshot capture
|
||||
- `v1_browser_content_extraction` - HTML, markdown, links, accessibility snapshot
|
||||
- `v1_browser_interaction` - Click button, type text, verify state via execute
|
||||
- `v1_browser_contexts_management` - Create, list, delete persistent browser profiles
|
||||
- **Learnings for future iterations:**
|
||||
- CdpClient must connect to a page-level endpoint (`/json/list` → first page), not the browser-level endpoint (`/json/version`). Browser endpoints only support Target/Browser domains; Page/Runtime/DOM commands need page sessions.
|
||||
- The CDP proxy endpoint (`/v1/browser/cdp`) correctly uses the browser-level URL since external tools (Playwright/Puppeteer) handle session management themselves.
|
||||
- Test files can be written into the container via `PUT /v1/fs/file?path=...` and then navigated to via `file:///` URLs.
|
||||
- Docker image rebuild is triggered by `OnceLock` in the test harness; changing the Dockerfile or server binary invalidates the cached image tag.
|
||||
- `reqwest::Client.query(&[("path", path)])` properly URL-encodes query parameters (no need for `urlencoding` crate).
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-030
|
||||
- Replaced fixed 500ms `tokio::time::sleep` in `browser_crawl.rs` with a `document.readyState` polling loop
|
||||
- Polls every 100ms via `Runtime.evaluate`, times out after 10s, proceeds with extraction on timeout
|
||||
- Files changed: `server/packages/sandbox-agent/src/browser_crawl.rs`
|
||||
- **Learnings for future iterations:**
|
||||
- CDP `Runtime.evaluate` with `document.readyState` is reliable for detecting page load completion
|
||||
- Using `std::time::Instant` for timeout tracking avoids drift issues compared to counting iterations
|
||||
- Graceful timeout (proceed anyway) is better than failing the crawl when a page is slow
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-031
|
||||
- Replaced faked `200` status with real HTTP status from `Network.responseReceived` CDP events
|
||||
- Enabled `Network.enable` domain before crawl loop
|
||||
- Subscribe to `Network.responseReceived` once, drain buffered events after readyState polling
|
||||
- Added `drain_navigation_status()` helper that takes last Document response for a frame (handles redirects)
|
||||
- Added `errorText` check on `Page.navigate` result: if navigation fails, record page with `None` status and skip extraction
|
||||
- Files changed: `server/packages/sandbox-agent/src/browser_crawl.rs`
|
||||
- **Learnings for future iterations:**
|
||||
- `Network.responseReceived` events have `type` field; use `"Document"` to filter for the main navigation response
|
||||
- For redirect chains, the last Document `Network.responseReceived` event has the final status code
|
||||
- `Page.navigate` returns `errorText` (non-empty string) when navigation fails (DNS error, connection refused, etc.)
|
||||
- `mpsc::UnboundedReceiver::try_recv()` is useful for non-blocking drain of buffered events
|
||||
- `file://` URLs don't produce Network events, so status will be `None` - this is correct behavior
|
||||
---
|
||||
|
||||
## 2026-03-17 - US-032
|
||||
- Removed dead `pub async fn cdp_client()` method from BrowserRuntime (browser_runtime.rs:552-564)
|
||||
- Method always returned `Err(BrowserProblem::cdp_error("Use with_cdp() to execute CDP commands"))` - no callers existed
|
||||
- Grep confirmed zero references to `cdp_client()` method; only the `cdp_client` field on BrowserRuntimeState is used
|
||||
- Files changed: `server/packages/sandbox-agent/src/browser_runtime.rs`
|
||||
- **Learnings for future iterations:**
|
||||
- When removing methods, grep for the method name across the entire src directory to confirm no callers
|
||||
- The `cdp_client` field on BrowserRuntimeState and the `cdp_client()` method on BrowserRuntime are different things - field is actively used
|
||||
---
|
||||
|
|
|
|||
135
scripts/ralph/ralph.sh
Executable file
135
scripts/ralph/ralph.sh
Executable file
|
|
@ -0,0 +1,135 @@
|
|||
#!/bin/bash
|
||||
# Ralph Wiggum - Long-running AI agent loop
|
||||
# Usage: ./ralph.sh [--tool amp|claude] [max_iterations]
|
||||
|
||||
set -e
|
||||
|
||||
# Parse arguments
|
||||
TOOL="amp" # Default to amp for backwards compatibility
|
||||
MAX_ITERATIONS=10
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--tool)
|
||||
TOOL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--tool=*)
|
||||
TOOL="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
# Assume it's max_iterations if it's a number
|
||||
if [[ "$1" =~ ^[0-9]+$ ]]; then
|
||||
MAX_ITERATIONS="$1"
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate tool choice
|
||||
if [[ "$TOOL" != "amp" && "$TOOL" != "claude" ]]; then
|
||||
echo "Error: Invalid tool '$TOOL'. Must be 'amp' or 'claude'."
|
||||
exit 1
|
||||
fi
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PRD_FILE="$SCRIPT_DIR/prd.json"
|
||||
PROGRESS_FILE="$SCRIPT_DIR/progress.txt"
|
||||
ARCHIVE_DIR="$SCRIPT_DIR/archive"
|
||||
LAST_BRANCH_FILE="$SCRIPT_DIR/.last-branch"
|
||||
|
||||
# Archive previous run if branch changed
|
||||
if [ -f "$PRD_FILE" ] && [ -f "$LAST_BRANCH_FILE" ]; then
|
||||
CURRENT_BRANCH=$(jq -r '.branchName // empty' "$PRD_FILE" 2>/dev/null || echo "")
|
||||
LAST_BRANCH=$(cat "$LAST_BRANCH_FILE" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$CURRENT_BRANCH" ] && [ -n "$LAST_BRANCH" ] && [ "$CURRENT_BRANCH" != "$LAST_BRANCH" ]; then
|
||||
# Archive the previous run
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
# Strip "ralph/" prefix from branch name for folder
|
||||
FOLDER_NAME=$(echo "$LAST_BRANCH" | sed 's|^ralph/||')
|
||||
ARCHIVE_FOLDER="$ARCHIVE_DIR/$DATE-$FOLDER_NAME"
|
||||
|
||||
echo "Archiving previous run: $LAST_BRANCH"
|
||||
mkdir -p "$ARCHIVE_FOLDER"
|
||||
[ -f "$PRD_FILE" ] && cp "$PRD_FILE" "$ARCHIVE_FOLDER/"
|
||||
[ -f "$PROGRESS_FILE" ] && cp "$PROGRESS_FILE" "$ARCHIVE_FOLDER/"
|
||||
echo " Archived to: $ARCHIVE_FOLDER"
|
||||
|
||||
# Reset progress file for new run
|
||||
echo "# Ralph Progress Log" > "$PROGRESS_FILE"
|
||||
echo "Started: $(date)" >> "$PROGRESS_FILE"
|
||||
echo "---" >> "$PROGRESS_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Track current branch
|
||||
if [ -f "$PRD_FILE" ]; then
|
||||
CURRENT_BRANCH=$(jq -r '.branchName // empty' "$PRD_FILE" 2>/dev/null || echo "")
|
||||
if [ -n "$CURRENT_BRANCH" ]; then
|
||||
echo "$CURRENT_BRANCH" > "$LAST_BRANCH_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Initialize progress file if it doesn't exist
|
||||
if [ ! -f "$PROGRESS_FILE" ]; then
|
||||
echo "# Ralph Progress Log" > "$PROGRESS_FILE"
|
||||
echo "Started: $(date)" >> "$PROGRESS_FILE"
|
||||
echo "---" >> "$PROGRESS_FILE"
|
||||
fi
|
||||
|
||||
RUN_START=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
echo "Starting Ralph - Tool: $TOOL - Max iterations: $MAX_ITERATIONS"
|
||||
echo "Run started: $RUN_START"
|
||||
|
||||
for i in $(seq 1 $MAX_ITERATIONS); do
|
||||
ITER_START=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
echo ""
|
||||
echo "==============================================================="
|
||||
echo " Ralph Iteration $i of $MAX_ITERATIONS ($TOOL)"
|
||||
echo " Started: $ITER_START"
|
||||
echo "==============================================================="
|
||||
|
||||
# Run the selected tool with the ralph prompt
|
||||
if [[ "$TOOL" == "amp" ]]; then
|
||||
OUTPUT=$(cat "$SCRIPT_DIR/prompt.md" | amp --dangerously-allow-all 2>&1 | tee /dev/stderr) || true
|
||||
else
|
||||
# Claude Code: use --dangerously-skip-permissions for autonomous operation, --print for output
|
||||
OUTPUT=$(claude --dangerously-skip-permissions --print < "$SCRIPT_DIR/CLAUDE.md" 2>&1 | tee /dev/stderr) || true
|
||||
fi
|
||||
|
||||
ITER_END=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
ITER_DURATION=$(($(date -d "$ITER_END" +%s) - $(date -d "$ITER_START" +%s)))
|
||||
ITER_MINS=$((ITER_DURATION / 60))
|
||||
ITER_SECS=$((ITER_DURATION % 60))
|
||||
|
||||
# Check for completion signal
|
||||
if echo "$OUTPUT" | grep -q "<promise>COMPLETE</promise>"; then
|
||||
RUN_END=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
RUN_DURATION=$(($(date -d "$RUN_END" +%s) - $(date -d "$RUN_START" +%s)))
|
||||
RUN_MINS=$((RUN_DURATION / 60))
|
||||
RUN_SECS=$((RUN_DURATION % 60))
|
||||
echo ""
|
||||
echo "Ralph completed all tasks!"
|
||||
echo "Completed at iteration $i of $MAX_ITERATIONS"
|
||||
echo "Iteration: ${ITER_MINS}m ${ITER_SECS}s"
|
||||
echo "Run started: $RUN_START"
|
||||
echo "Run finished: $RUN_END (total: ${RUN_MINS}m ${RUN_SECS}s)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Iteration $i complete. Finished: $ITER_END (${ITER_MINS}m ${ITER_SECS}s)"
|
||||
sleep 2
|
||||
done
|
||||
|
||||
RUN_END=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
RUN_DURATION=$(($(date -d "$RUN_END" +%s) - $(date -d "$RUN_START" +%s)))
|
||||
RUN_MINS=$((RUN_DURATION / 60))
|
||||
RUN_SECS=$((RUN_DURATION % 60))
|
||||
echo ""
|
||||
echo "Ralph reached max iterations ($MAX_ITERATIONS) without completing all tasks."
|
||||
echo "Run started: $RUN_START"
|
||||
echo "Run finished: $RUN_END (total: ${RUN_MINS}m ${RUN_SECS}s)"
|
||||
echo "Check $PROGRESS_FILE for status."
|
||||
exit 1
|
||||
|
|
@ -24,8 +24,8 @@ use crate::process_runtime::{
|
|||
ProcessOwner, ProcessRuntime, ProcessStartSpec, ProcessStatus, RestartPolicy,
|
||||
};
|
||||
|
||||
const DEFAULT_WIDTH: u32 = 1440;
|
||||
const DEFAULT_HEIGHT: u32 = 900;
|
||||
const DEFAULT_WIDTH: u32 = 1280;
|
||||
const DEFAULT_HEIGHT: u32 = 720;
|
||||
const DEFAULT_DPI: u32 = 96;
|
||||
const DEFAULT_DISPLAY_NUM: i32 = 98;
|
||||
const MAX_DISPLAY_PROBE: i32 = 10;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue