host daemon (#2)

* feat: host daemon api scaffold

* fix: use sparse writes

* fix: unix socket length (<108 bytes)
This commit is contained in:
Hari 2026-04-08 11:23:19 -04:00 committed by GitHub
parent 4028bb5a1d
commit e2f9e54970
21 changed files with 2111 additions and 372 deletions

View file

@ -14,7 +14,7 @@ import (
const (
defaultCgroupVersion = "2"
defaultFirecrackerInitTimeout = 3 * time.Second
defaultFirecrackerInitTimeout = 10 * time.Second
defaultFirecrackerPollInterval = 10 * time.Millisecond
defaultRootDriveID = "root_drive"
defaultVSockRunDir = "/run"
@ -120,20 +120,34 @@ func waitForSocket(ctx context.Context, client *apiClient, socketPath string) er
ticker := time.NewTicker(defaultFirecrackerPollInterval)
defer ticker.Stop()
var lastStatErr error
var lastPingErr error
for {
select {
case <-waitContext.Done():
return waitContext.Err()
switch {
case lastPingErr != nil:
return fmt.Errorf("%w (socket=%q last_ping_err=%v)", waitContext.Err(), socketPath, lastPingErr)
case lastStatErr != nil:
return fmt.Errorf("%w (socket=%q last_stat_err=%v)", waitContext.Err(), socketPath, lastStatErr)
default:
return fmt.Errorf("%w (socket=%q)", waitContext.Err(), socketPath)
}
case <-ticker.C:
if _, err := os.Stat(socketPath); err != nil {
if os.IsNotExist(err) {
lastStatErr = err
continue
}
return fmt.Errorf("stat socket %q: %w", socketPath, err)
}
lastStatErr = nil
if err := client.Ping(waitContext); err != nil {
lastPingErr = err
continue
}
lastPingErr = nil
return nil
}
}

View file

@ -4,6 +4,7 @@ import (
"context"
"encoding/binary"
"fmt"
"net"
"net/netip"
"os/exec"
"strings"
@ -47,6 +48,30 @@ func (n NetworkAllocation) GuestIP() netip.Addr {
return n.GuestCIDR.Addr()
}
// AllocationFromGuestIP reconstructs the host-side allocation from a guest IP and tap name.
func AllocationFromGuestIP(guestIP string, tapName string) (NetworkAllocation, error) {
parsed := net.ParseIP(strings.TrimSpace(guestIP))
if parsed == nil {
return NetworkAllocation{}, fmt.Errorf("parse guest ip %q", guestIP)
}
addr, ok := netip.AddrFromSlice(parsed.To4())
if !ok {
return NetworkAllocation{}, fmt.Errorf("guest ip %q must be IPv4", guestIP)
}
base := ipv4ToUint32(addr) - 2
hostIP := uint32ToIPv4(base + 1)
guest := uint32ToIPv4(base + 2)
return NetworkAllocation{
InterfaceID: defaultInterfaceID,
TapName: strings.TrimSpace(tapName),
HostCIDR: netip.PrefixFrom(hostIP, defaultNetworkPrefixBits),
GuestCIDR: netip.PrefixFrom(guest, defaultNetworkPrefixBits),
GatewayIP: hostIP,
GuestMAC: macForIPv4(guest),
}, nil
}
// NewNetworkAllocator returns a new /30 allocator rooted at the provided IPv4 prefix.
func NewNetworkAllocator(cidr string) (*NetworkAllocator, error) {
cidr = strings.TrimSpace(cidr)

View file

@ -3,6 +3,7 @@ package firecracker
import (
"fmt"
"path/filepath"
"strconv"
"strings"
)
@ -45,3 +46,7 @@ func buildMachinePaths(rootDir string, id MachineID, firecrackerBinaryPath strin
SocketPath: filepath.Join(chrootRootDir, defaultFirecrackerSocketDir, defaultFirecrackerSocketName),
}, nil
}
func procSocketPath(pid int) string {
return filepath.Join("/proc", strconv.Itoa(pid), "root", defaultFirecrackerSocketDir, defaultFirecrackerSocketName)
}

View file

@ -8,7 +8,6 @@ import (
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
)
@ -27,26 +26,9 @@ type Runtime struct {
jailerBinaryPath string
networkAllocator *NetworkAllocator
networkProvisioner NetworkProvisioner
mu sync.RWMutex
machines map[MachineID]*managedMachine
}
type managedMachine struct {
cmd *exec.Cmd
entered bool
exited chan struct{}
network NetworkAllocation
paths machinePaths
spec MachineSpec
state MachineState
stopping bool
}
const (
defaultVSockCIDStart = uint32(3)
defaultVSockID = "vsock0"
)
const debugPreserveFailureEnv = "FIRECRACKER_DEBUG_PRESERVE_FAILURE"
func NewRuntime(cfg RuntimeConfig) (*Runtime, error) {
rootDir := filepath.Clean(strings.TrimSpace(cfg.RootDir))
@ -79,64 +61,28 @@ func NewRuntime(cfg RuntimeConfig) (*Runtime, error) {
jailerBinaryPath: jailerBinaryPath,
networkAllocator: allocator,
networkProvisioner: NewIPTapProvisioner(),
machines: make(map[MachineID]*managedMachine),
}, nil
}
func (r *Runtime) Boot(ctx context.Context, spec MachineSpec) (*MachineState, error) {
func (r *Runtime) Boot(ctx context.Context, spec MachineSpec, usedNetworks []NetworkAllocation) (*MachineState, error) {
if err := spec.Validate(); err != nil {
return nil, err
}
r.mu.Lock()
if _, exists := r.machines[spec.ID]; exists {
r.mu.Unlock()
return nil, fmt.Errorf("machine %q already exists", spec.ID)
}
usedNetworks := make([]NetworkAllocation, 0, len(r.machines))
usedVSockCIDs := make(map[uint32]struct{}, len(r.machines))
for _, machine := range r.machines {
if machine == nil {
continue
}
usedNetworks = append(usedNetworks, machine.network)
if machine.spec.Vsock != nil {
usedVSockCIDs[machine.spec.Vsock.CID] = struct{}{}
}
}
spec, err := r.resolveVSock(spec, usedVSockCIDs)
if err != nil {
r.mu.Unlock()
return nil, err
}
r.machines[spec.ID] = &managedMachine{
spec: spec,
state: MachineState{
ID: spec.ID,
Phase: PhaseProvisioning,
},
entered: true,
}
r.mu.Unlock()
cleanup := func(network NetworkAllocation, paths machinePaths, command *exec.Cmd) {
if preserveFailureArtifacts() {
fmt.Fprintf(os.Stderr, "firecracker debug: preserving failure artifacts machine=%s pid=%d socket=%s base=%s\n", spec.ID, pidOf(command), paths.SocketPath, paths.BaseDir)
return
}
cleanupStartedProcess(command)
_ = r.networkProvisioner.Remove(context.Background(), network)
_ = removeIfExists(hostVSockPath(paths, spec))
if paths.BaseDir != "" {
_ = os.RemoveAll(paths.BaseDir)
}
r.mu.Lock()
delete(r.machines, spec.ID)
r.mu.Unlock()
}
network, err := r.networkAllocator.Allocate(usedNetworks)
if err != nil {
cleanup(NetworkAllocation{}, machinePaths{}, nil)
return nil, err
}
@ -159,9 +105,14 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec) (*MachineState, er
cleanup(network, paths, nil)
return nil, err
}
socketPath := paths.SocketPath
if pid := pidOf(command); pid > 0 {
socketPath = procSocketPath(pid)
}
fmt.Fprintf(os.Stderr, "firecracker debug: launched machine=%s pid=%d socket=%s jailer_base=%s\n", spec.ID, pidOf(command), socketPath, paths.JailerBaseDir)
client := newAPIClient(paths.SocketPath)
if err := waitForSocket(ctx, client, paths.SocketPath); err != nil {
client := newAPIClient(socketPath)
if err := waitForSocket(ctx, client, socketPath); err != nil {
cleanup(network, paths, command)
return nil, fmt.Errorf("wait for firecracker socket: %w", err)
}
@ -187,162 +138,74 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec) (*MachineState, er
Phase: PhaseRunning,
PID: pid,
RuntimeHost: network.GuestIP().String(),
SocketPath: paths.SocketPath,
SocketPath: socketPath,
TapName: network.TapName,
StartedAt: &now,
}
r.mu.Lock()
entry := r.machines[spec.ID]
entry.cmd = command
entry.exited = make(chan struct{})
entry.network = network
entry.paths = paths
entry.state = state
r.mu.Unlock()
go r.watchMachine(spec.ID, command, entry.exited)
out := state
return &out, nil
}
func (r *Runtime) Inspect(id MachineID) (*MachineState, error) {
r.mu.RLock()
entry, ok := r.machines[id]
r.mu.RUnlock()
if !ok || entry == nil {
return nil, ErrMachineNotFound
}
state := entry.state
if state.PID > 0 && !processExists(state.PID) {
state.Phase = PhaseStopped
state.PID = 0
}
return &state, nil
}
func (r *Runtime) Stop(ctx context.Context, id MachineID) error {
r.mu.RLock()
entry, ok := r.machines[id]
r.mu.RUnlock()
if !ok || entry == nil {
return ErrMachineNotFound
func (r *Runtime) Inspect(state MachineState) (*MachineState, error) {
if state.PID > 0 && !processExists(state.PID) {
state.Phase = PhaseFailed
state.PID = 0
state.Error = "firecracker process not found"
}
if entry.cmd == nil || entry.cmd.Process == nil {
return fmt.Errorf("machine %q has no firecracker process", id)
}
if entry.state.Phase == PhaseStopped {
return &state, nil
}
func (r *Runtime) Stop(ctx context.Context, state MachineState) error {
if state.PID < 1 || !processExists(state.PID) {
return nil
}
r.mu.Lock()
entry.stopping = true
process := entry.cmd.Process
exited := entry.exited
r.mu.Unlock()
process, err := os.FindProcess(state.PID)
if err != nil {
return fmt.Errorf("find process for machine %q: %w", state.ID, err)
}
if err := process.Signal(syscall.SIGTERM); err != nil && !errors.Is(err, os.ErrProcessDone) {
return fmt.Errorf("stop machine %q: %w", id, err)
return fmt.Errorf("stop machine %q: %w", state.ID, err)
}
select {
case <-exited:
return nil
case <-ctx.Done():
return ctx.Err()
ticker := time.NewTicker(50 * time.Millisecond)
defer ticker.Stop()
for {
if !processExists(state.PID) {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
}
}
}
func (r *Runtime) Delete(ctx context.Context, id MachineID) error {
r.mu.RLock()
entry, ok := r.machines[id]
r.mu.RUnlock()
if !ok || entry == nil {
return ErrMachineNotFound
func (r *Runtime) Delete(ctx context.Context, state MachineState) error {
if err := r.Stop(ctx, state); err != nil {
return err
}
if entry.state.Phase == PhaseRunning {
if err := r.Stop(ctx, id); err != nil && !errors.Is(err, context.Canceled) {
if strings.TrimSpace(state.RuntimeHost) != "" && strings.TrimSpace(state.TapName) != "" {
network, err := AllocationFromGuestIP(state.RuntimeHost, state.TapName)
if err != nil {
return err
}
if err := r.networkProvisioner.Remove(ctx, network); err != nil {
return err
}
}
if err := r.networkProvisioner.Remove(ctx, entry.network); err != nil {
return err
}
if err := removeIfExists(hostVSockPath(entry.paths, entry.spec)); err != nil {
return err
}
if err := os.RemoveAll(entry.paths.BaseDir); err != nil {
return fmt.Errorf("remove machine dir %q: %w", entry.paths.BaseDir, err)
}
r.mu.Lock()
delete(r.machines, id)
r.mu.Unlock()
paths, err := buildMachinePaths(r.rootDir, state.ID, r.firecrackerBinaryPath)
if err != nil {
return err
}
if err := os.RemoveAll(paths.BaseDir); err != nil {
return fmt.Errorf("remove machine dir %q: %w", paths.BaseDir, err)
}
return nil
}
func (r *Runtime) watchMachine(id MachineID, command *exec.Cmd, exited chan struct{}) {
err := command.Wait()
close(exited)
r.mu.Lock()
defer r.mu.Unlock()
entry, ok := r.machines[id]
if !ok || entry == nil || entry.cmd != command {
return
}
entry.state.PID = 0
if entry.stopping {
entry.state.Phase = PhaseStopped
entry.state.Error = ""
entry.stopping = false
return
}
if err != nil {
entry.state.Phase = PhaseError
entry.state.Error = err.Error()
return
}
entry.state.Phase = PhaseStopped
entry.state.Error = ""
}
func (r *Runtime) resolveVSock(spec MachineSpec, used map[uint32]struct{}) (MachineSpec, error) {
if spec.Vsock != nil {
if _, exists := used[spec.Vsock.CID]; exists {
return MachineSpec{}, fmt.Errorf("vsock cid %d already in use", spec.Vsock.CID)
}
return spec, nil
}
cid, err := nextVSockCID(used)
if err != nil {
return MachineSpec{}, err
}
spec.Vsock = &VsockSpec{
ID: defaultVSockID,
CID: cid,
Path: string(spec.ID) + ".sock",
}
return spec, nil
}
func nextVSockCID(used map[uint32]struct{}) (uint32, error) {
for cid := defaultVSockCIDStart; cid != 0; cid++ {
if _, exists := used[cid]; !exists {
return cid, nil
}
}
return 0, fmt.Errorf("vsock cid space exhausted")
}
func processExists(pid int) bool {
if pid < 1 {
return false
@ -351,12 +214,19 @@ func processExists(pid int) bool {
return err == nil || err == syscall.EPERM
}
func removeIfExists(path string) error {
if path == "" {
return nil
func pidOf(command *exec.Cmd) int {
if command == nil || command.Process == nil {
return 0
}
return command.Process.Pid
}
func preserveFailureArtifacts() bool {
value := strings.TrimSpace(os.Getenv(debugPreserveFailureEnv))
switch strings.ToLower(value) {
case "1", "true", "yes", "on":
return true
default:
return false
}
if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("remove path %q: %w", path, err)
}
return nil
}

View file

@ -18,14 +18,10 @@ type MachineState struct {
}
const (
// PhaseProvisioning means host-local resources are still being prepared.
PhaseProvisioning Phase = "provisioning"
// PhaseRunning means the Firecracker process is live.
PhaseRunning Phase = "running"
// PhaseStopped means the VM is no longer running.
PhaseStopped Phase = "stopped"
// PhaseMissing means the machine is not known to the runtime.
PhaseMissing Phase = "missing"
// PhaseError means the runtime observed a terminal failure.
PhaseError Phase = "error"
// PhaseFailed means the runtime observed a terminal failure.
PhaseFailed Phase = "failed"
)