package daemon import ( "context" "fmt" "net" "os" "sync" "time" appconfig "github.com/getcompanion-ai/computer-host/internal/config" "github.com/getcompanion-ai/computer-host/internal/firecracker" "github.com/getcompanion-ai/computer-host/internal/model" "github.com/getcompanion-ai/computer-host/internal/store" contracthost "github.com/getcompanion-ai/computer-host/contract" ) const ( defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=0" defaultGuestKernelArgsNoPCI = defaultGuestKernelArgs + " pci=off" defaultGuestMemoryMiB = int64(3072) defaultGuestVCPUs = int64(2) defaultGuestDiskSizeBytes = int64(10 * 1024 * 1024 * 1024) // 10 GB defaultSSHPort = uint16(2222) defaultVNCPort = uint16(6080) defaultGuestdPort = uint16(49983) defaultCopyBufferSize = 1024 * 1024 defaultGuestDialTimeout = 500 * time.Millisecond defaultGuestStopTimeout = 10 * time.Second ) type Runtime interface { Boot(context.Context, firecracker.MachineSpec, []firecracker.NetworkAllocation) (*firecracker.MachineState, error) Inspect(firecracker.MachineState) (*firecracker.MachineState, error) Delete(context.Context, firecracker.MachineState) error Pause(context.Context, firecracker.MachineState) error Resume(context.Context, firecracker.MachineState) error CreateSnapshot(context.Context, firecracker.MachineState, firecracker.SnapshotPaths) error RestoreBoot(context.Context, firecracker.SnapshotLoadSpec, []firecracker.NetworkAllocation) (*firecracker.MachineState, error) PutMMDS(context.Context, firecracker.MachineState, any) error } type Daemon struct { config appconfig.Config store store.Store runtime Runtime reconfigureGuestIdentity func(context.Context, string, contracthost.MachineID, *contracthost.GuestConfig) error readGuestSSHPublicKey func(context.Context, string) (string, error) injectMachineIdentity func(context.Context, string, contracthost.MachineID) error injectGuestConfig func(context.Context, string, *contracthost.GuestConfig) error syncGuestFilesystem func(context.Context, string) error shutdownGuest func(context.Context, string) error personalizeGuest func(context.Context, *model.MachineRecord, firecracker.MachineState) (*guestReadyResult, error) locksMu sync.Mutex machineLocks map[contracthost.MachineID]*sync.Mutex artifactLocks map[string]*sync.Mutex relayAllocMu sync.Mutex machineRelaysMu sync.Mutex machineRelayListeners map[string]net.Listener publishedPortAllocMu sync.Mutex publishedPortsMu sync.Mutex publishedPortListeners map[contracthost.PublishedPortID]net.Listener } func New(cfg appconfig.Config, store store.Store, runtime Runtime) (*Daemon, error) { if err := cfg.Validate(); err != nil { return nil, err } if store == nil { return nil, fmt.Errorf("store is required") } if runtime == nil { return nil, fmt.Errorf("runtime is required") } for _, dir := range []string{cfg.ArtifactsDir, cfg.MachineDisksDir, cfg.SnapshotsDir, cfg.RuntimeDir} { if err := os.MkdirAll(dir, 0o755); err != nil { return nil, fmt.Errorf("create daemon dir %q: %w", dir, err) } } if err := validateDiskCloneBackend(cfg); err != nil { return nil, err } daemon := &Daemon{ config: cfg, store: store, runtime: runtime, reconfigureGuestIdentity: nil, readGuestSSHPublicKey: nil, injectMachineIdentity: nil, injectGuestConfig: nil, personalizeGuest: nil, machineLocks: make(map[contracthost.MachineID]*sync.Mutex), artifactLocks: make(map[string]*sync.Mutex), machineRelayListeners: make(map[string]net.Listener), publishedPortListeners: make(map[contracthost.PublishedPortID]net.Listener), } daemon.reconfigureGuestIdentity = daemon.reconfigureGuestIdentityOverSSH daemon.readGuestSSHPublicKey = readGuestSSHPublicKey daemon.injectMachineIdentity = injectMachineIdentity daemon.injectGuestConfig = injectGuestConfig daemon.syncGuestFilesystem = daemon.syncGuestFilesystemOverSSH daemon.shutdownGuest = daemon.issueGuestPoweroff daemon.personalizeGuest = daemon.personalizeGuestConfig if err := daemon.ensureBackendSSHKeyPair(); err != nil { return nil, err } return daemon, nil } func (d *Daemon) Health(ctx context.Context) (*contracthost.HealthResponse, error) { if _, err := d.store.ListMachines(ctx); err != nil { return nil, err } return &contracthost.HealthResponse{OK: true}, nil } func (d *Daemon) lockMachine(machineID contracthost.MachineID) func() { lock := d.machineLock(machineID) lock.Lock() return lock.Unlock } func (d *Daemon) tryLockMachine(machineID contracthost.MachineID) (func(), bool) { lock := d.machineLock(machineID) if !lock.TryLock() { return nil, false } return lock.Unlock, true } func (d *Daemon) machineLock(machineID contracthost.MachineID) *sync.Mutex { d.locksMu.Lock() lock, ok := d.machineLocks[machineID] if !ok { lock = &sync.Mutex{} d.machineLocks[machineID] = lock } d.locksMu.Unlock() return lock } func (d *Daemon) lockArtifact(key string) func() { d.locksMu.Lock() lock, ok := d.artifactLocks[key] if !ok { lock = &sync.Mutex{} d.artifactLocks[key] = lock } d.locksMu.Unlock() lock.Lock() return lock.Unlock } func guestKernelArgs(enablePCI bool) string { if enablePCI { return defaultGuestKernelArgs } return defaultGuestKernelArgsNoPCI }