fix: supervise guest init and desktop services

This commit is contained in:
Harivansh Rathi 2026-04-08 14:02:14 -04:00
parent 1e55cb4e81
commit 09c9671fcb
2 changed files with 163 additions and 21 deletions

View file

@ -1,23 +1,114 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -uo pipefail
export DISPLAY=:0 export DISPLAY=:0
Xvfb "$DISPLAY" -screen 0 1280x800x24 >/tmp/xvfb.log 2>&1 & log() {
XVFB_PID=$! printf '[microagent-desktop] %s\n' "$*" >&2
}
for _ in $(seq 1 50); do pid_running() {
local pid="${1:-}"
[ -n "$pid" ] && kill -0 "$pid" >/dev/null 2>&1
}
reap_if_needed() {
local pid="${1:-}"
if [ -n "$pid" ]; then
wait "$pid" >/dev/null 2>&1 || true
fi
}
cleanup() {
trap - INT TERM
[ -n "${websockify_pid:-}" ] && kill "$websockify_pid" >/dev/null 2>&1 || true
[ -n "${x11vnc_pid:-}" ] && kill "$x11vnc_pid" >/dev/null 2>&1 || true
[ -n "${xterm_pid:-}" ] && kill "$xterm_pid" >/dev/null 2>&1 || true
[ -n "${openbox_pid:-}" ] && kill "$openbox_pid" >/dev/null 2>&1 || true
[ -n "${xvfb_pid:-}" ] && kill "$xvfb_pid" >/dev/null 2>&1 || true
wait >/dev/null 2>&1 || true
exit 0
}
start_openbox() {
reap_if_needed "${openbox_pid:-}"
log "starting openbox"
runuser -u node -- env DISPLAY="$DISPLAY" openbox >>/tmp/openbox.log 2>&1 &
openbox_pid=$!
}
start_xterm() {
reap_if_needed "${xterm_pid:-}"
log "starting xterm"
runuser -u node -- env DISPLAY="$DISPLAY" xterm -fa Monospace -fs 12 >>/tmp/xterm.log 2>&1 &
xterm_pid=$!
}
start_x11vnc() {
reap_if_needed "${x11vnc_pid:-}"
log "starting x11vnc"
x11vnc -display "$DISPLAY" -rfbport 5900 -forever -shared -nopw >>/tmp/x11vnc.log 2>&1 &
x11vnc_pid=$!
}
start_websockify() {
reap_if_needed "${websockify_pid:-}"
log "starting websockify on 6080"
websockify --web=/usr/share/novnc 6080 localhost:5900 >>/tmp/websockify.log 2>&1 &
websockify_pid=$!
}
trap cleanup INT TERM
log "starting Xvfb"
Xvfb "$DISPLAY" -screen 0 1280x800x24 >/tmp/xvfb.log 2>&1 &
xvfb_pid=$!
ready=0
for _ in $(seq 1 100); do
if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then
ready=1
break break
fi fi
if ! pid_running "$xvfb_pid"; then
log "Xvfb exited before display became ready"
wait "$xvfb_pid" >/dev/null 2>&1 || true
exit 1
fi
sleep 0.1 sleep 0.1
done done
runuser -u node -- env DISPLAY="$DISPLAY" openbox >/tmp/openbox.log 2>&1 & if [ "$ready" -ne 1 ]; then
runuser -u node -- env DISPLAY="$DISPLAY" xterm -fa Monospace -fs 12 >/tmp/xterm.log 2>&1 & log "Xvfb did not become ready in time"
exit 1
fi
x11vnc -display "$DISPLAY" -rfbport 5900 -forever -shared -nopw >/tmp/x11vnc.log 2>&1 & start_openbox
websockify --web=/usr/share/novnc 6080 localhost:5900 >/tmp/websockify.log 2>&1 & start_xterm
start_x11vnc
start_websockify
trap 'kill $XVFB_PID || true; kill 0 || true; exit 0' INT TERM while true; do
wait -n if ! pid_running "$xvfb_pid"; then
log "Xvfb exited; stopping desktop session"
wait "$xvfb_pid" >/dev/null 2>&1 || true
exit 1
fi
if ! pid_running "${openbox_pid:-}"; then
log "openbox exited; restarting"
start_openbox
fi
if ! pid_running "${xterm_pid:-}"; then
log "xterm exited; restarting"
start_xterm
fi
if ! pid_running "${x11vnc_pid:-}"; then
log "x11vnc exited; restarting"
start_x11vnc
fi
if ! pid_running "${websockify_pid:-}"; then
log "websockify exited; restarting"
start_websockify
fi
sleep 1
done

View file

@ -1,5 +1,11 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -uo pipefail
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
log() {
printf '[microagent-init] %s\n' "$*" >&2
}
mountpoint -q /proc || mount -t proc proc /proc mountpoint -q /proc || mount -t proc proc /proc
mountpoint -q /sys || mount -t sysfs sysfs /sys mountpoint -q /sys || mount -t sysfs sysfs /sys
@ -9,32 +15,77 @@ mountpoint -q /run || mount -t tmpfs tmpfs /run
mkdir -p /tmp /var/tmp /run/sshd /var/log mkdir -p /tmp /var/tmp /run/sshd /var/log
chmod 1777 /tmp /var/tmp chmod 1777 /tmp /var/tmp
# PID 1 is the guest supervisor: it brings up the minimum runtime surface and cleanup() {
# stays alive to keep the VM services attached to a single lifecycle root. trap - INT TERM
[ -n "${rng_pid:-}" ] && kill "$rng_pid" >/dev/null 2>&1 || true
[ -n "${sshd_pid:-}" ] && kill "$sshd_pid" >/dev/null 2>&1 || true
[ -n "${desktop_pid:-}" ] && kill "$desktop_pid" >/dev/null 2>&1 || true
wait >/dev/null 2>&1 || true
exit 0
}
pid_running() {
local pid="${1:-}"
[ -n "$pid" ] && kill -0 "$pid" >/dev/null 2>&1
}
reap_if_needed() {
local pid="${1:-}"
if [ -n "$pid" ]; then
wait "$pid" >/dev/null 2>&1 || true
fi
}
start_sshd() {
reap_if_needed "${sshd_pid:-}"
log "starting sshd on 2222"
/usr/sbin/sshd -D -e >>/var/log/sshd.log 2>&1 &
sshd_pid=$!
}
start_desktop() {
reap_if_needed "${desktop_pid:-}"
log "starting noVNC desktop on 6080"
/usr/local/bin/microagent-desktop-session >>/var/log/desktop.log 2>&1 &
desktop_pid=$!
}
trap cleanup INT TERM
log "bringing up guest network"
if ! /usr/local/bin/microagent-network-up >/var/log/network.log 2>&1; then if ! /usr/local/bin/microagent-network-up >/var/log/network.log 2>&1; then
cat /var/log/network.log >&2 || true cat /var/log/network.log >&2 || true
exit 1 exit 1
fi fi
if [ ! -f /etc/ssh/ssh_host_ed25519_key ]; then if [ ! -f /etc/ssh/ssh_host_ed25519_key ]; then
log "generating ssh host keys"
ssh-keygen -A ssh-keygen -A
fi fi
if [ -f /etc/microagent/authorized_keys ]; then if [ -f /etc/microagent/authorized_keys ]; then
log "installing baked authorized_keys for node"
install -d -m 0700 -o node -g node /home/node/.ssh install -d -m 0700 -o node -g node /home/node/.ssh
install -m 0600 -o node -g node /etc/microagent/authorized_keys /home/node/.ssh/authorized_keys install -m 0600 -o node -g node /etc/microagent/authorized_keys /home/node/.ssh/authorized_keys
fi fi
if command -v jitterentropy-rngd >/dev/null 2>&1; then if command -v jitterentropy-rngd >/dev/null 2>&1; then
log "starting jitterentropy-rngd"
jitterentropy-rngd -v >/var/log/jitterentropy.log 2>&1 & jitterentropy-rngd -v >/var/log/jitterentropy.log 2>&1 &
rng_pid=$!
fi fi
/usr/sbin/sshd -D -e >/var/log/sshd.log 2>&1 & start_sshd
/usr/local/bin/microagent-desktop-session >/var/log/desktop.log 2>&1 & start_desktop
trap 'kill 0 || true; exit 0' INT TERM while true; do
wait -n if ! pid_running "${sshd_pid:-}"; then
status=$? log "sshd exited; restarting"
kill 0 || true start_sshd
wait || true fi
exit "$status" if ! pid_running "${desktop_pid:-}"; then
log "desktop session exited; restarting"
start_desktop
fi
sleep 1
done