Integrate OpenHandoff factory workspace (#212)

2026-04-21 21:04:04 +00:00 · 2026-03-09 14:00:20 -07:00 · 2026-03-09 14:00:20 -07:00 · bf282199b5
commit bf282199b5
parent 3d9476ed0b
251 changed files with 42824 additions and 692 deletions
--- a/.agents/skills/agent-browser/templates/capture-workflow.sh
+++ b/.agents/skills/agent-browser/templates/capture-workflow.sh
@ -1,68 +1,69 @@
 #!/bin/bash
 # Template: Content Capture Workflow
-# Extract content from web pages with optional authentication
+# Purpose: Extract content from web pages (text, screenshots, PDF)
+# Usage: ./capture-workflow.sh <url> [output-dir]
+#
+# Outputs:
+#   - page-full.png: Full page screenshot
+#   - page-structure.txt: Page element structure with refs
+#   - page-text.txt: All text content
+#   - page.pdf: PDF version
+#
+# Optional: Load auth state for protected pages

 set -euo pipefail

 TARGET_URL="${1:?Usage: $0 <url> [output-dir]}"
 OUTPUT_DIR="${2:-.}"

-echo "Capturing content from: $TARGET_URL"
+echo "Capturing: $TARGET_URL"
 mkdir -p "$OUTPUT_DIR"

-# Optional: Load authentication state if needed
+# Optional: Load authentication state
 # if [[ -f "./auth-state.json" ]]; then
+#     echo "Loading authentication state..."
 #     agent-browser state load "./auth-state.json"
 # fi

-# Navigate to target page
+# Navigate to target
 agent-browser open "$TARGET_URL"
 agent-browser wait --load networkidle

-# Get page metadata
-echo "Page title: $(agent-browser get title)"
-echo "Page URL: $(agent-browser get url)"
+# Get metadata
+TITLE=$(agent-browser get title)
+URL=$(agent-browser get url)
+echo "Title: $TITLE"
+echo "URL: $URL"

 # Capture full page screenshot
 agent-browser screenshot --full "$OUTPUT_DIR/page-full.png"
-echo "Screenshot saved: $OUTPUT_DIR/page-full.png"
+echo "Saved: $OUTPUT_DIR/page-full.png"

-# Get page structure
+# Get page structure with refs
 agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt"
-echo "Structure saved: $OUTPUT_DIR/page-structure.txt"
+echo "Saved: $OUTPUT_DIR/page-structure.txt"

-# Extract main content
-# Adjust selector based on target site structure
-# agent-browser get text @e1 > "$OUTPUT_DIR/main-content.txt"
-
-# Extract specific elements (uncomment as needed)
-# agent-browser get text "article" > "$OUTPUT_DIR/article.txt"
-# agent-browser get text "main" > "$OUTPUT_DIR/main.txt"
-# agent-browser get text ".content" > "$OUTPUT_DIR/content.txt"
-
-# Get full page text
+# Extract all text content
 agent-browser get text body > "$OUTPUT_DIR/page-text.txt"
-echo "Text content saved: $OUTPUT_DIR/page-text.txt"
+echo "Saved: $OUTPUT_DIR/page-text.txt"

-# Optional: Save as PDF
+# Save as PDF
 agent-browser pdf "$OUTPUT_DIR/page.pdf"
-echo "PDF saved: $OUTPUT_DIR/page.pdf"
+echo "Saved: $OUTPUT_DIR/page.pdf"

-# Optional: Capture with scrolling for infinite scroll pages
-# scroll_and_capture() {
-#     local count=0
-#     while [[ $count -lt 5 ]]; do
-#         agent-browser scroll down 1000
-#         agent-browser wait 1000
-#         ((count++))
-#     done
-#     agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png"
-# }
-# scroll_and_capture
+# Optional: Extract specific elements using refs from structure
+# agent-browser get text @e5 > "$OUTPUT_DIR/main-content.txt"
+
+# Optional: Handle infinite scroll pages
+# for i in {1..5}; do
+#     agent-browser scroll down 1000
+#     agent-browser wait 1000
+# done
+# agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png"

 # Cleanup
 agent-browser close

 echo ""
-echo "Capture complete! Files saved to: $OUTPUT_DIR"
+echo "Capture complete:"
 ls -la "$OUTPUT_DIR"