mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 06:04:38 +00:00
host daemon (#2)
* feat: host daemon api scaffold * fix: use sparse writes * fix: unix socket length (<108 bytes)
This commit is contained in:
parent
4028bb5a1d
commit
e2f9e54970
21 changed files with 2111 additions and 372 deletions
221
internal/daemon/create.go
Normal file
221
internal/daemon/create.go
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/model"
|
||||
"github.com/getcompanion-ai/computer-host/internal/store"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachineRequest) (*contracthost.CreateMachineResponse, error) {
|
||||
if err := validateMachineID(req.MachineID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validateArtifactRef(req.Artifact); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
unlock := d.lockMachine(req.MachineID)
|
||||
defer unlock()
|
||||
|
||||
if _, err := d.store.GetMachine(ctx, req.MachineID); err == nil {
|
||||
return nil, fmt.Errorf("machine %q already exists", req.MachineID)
|
||||
} else if err != nil && err != store.ErrNotFound {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := d.store.UpsertOperation(ctx, model.OperationRecord{
|
||||
MachineID: req.MachineID,
|
||||
Type: model.MachineOperationCreate,
|
||||
StartedAt: time.Now().UTC(),
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
clearOperation := false
|
||||
defer func() {
|
||||
if clearOperation {
|
||||
_ = d.store.DeleteOperation(context.Background(), req.MachineID)
|
||||
}
|
||||
}()
|
||||
|
||||
artifact, err := d.ensureArtifact(ctx, req.Artifact)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
userVolumes, err := d.loadAttachableUserVolumes(ctx, req.MachineID, req.UserVolumeIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
systemVolumePath := d.systemVolumePath(req.MachineID)
|
||||
if err := os.MkdirAll(filepath.Dir(systemVolumePath), 0o755); err != nil {
|
||||
return nil, fmt.Errorf("create system volume dir for %q: %w", req.MachineID, err)
|
||||
}
|
||||
if err := cloneFile(artifact.RootFSPath, systemVolumePath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
spec, err := d.buildMachineSpec(req.MachineID, artifact, userVolumes, systemVolumePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
usedNetworks, err := d.listRunningNetworks(ctx, req.MachineID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
state, err := d.runtime.Boot(ctx, spec, usedNetworks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
systemVolumeRecord := model.VolumeRecord{
|
||||
ID: d.systemVolumeID(req.MachineID),
|
||||
Kind: contracthost.VolumeKindSystem,
|
||||
AttachedMachineID: machineIDPtr(req.MachineID),
|
||||
SourceArtifact: &req.Artifact,
|
||||
Pool: model.StoragePoolMachineDisks,
|
||||
Path: systemVolumePath,
|
||||
CreatedAt: now,
|
||||
}
|
||||
if err := d.store.CreateVolume(ctx, systemVolumeRecord); err != nil {
|
||||
_ = d.runtime.Delete(context.Background(), *state)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
attachedUserVolumeIDs := make([]contracthost.VolumeID, 0, len(userVolumes))
|
||||
for _, volume := range userVolumes {
|
||||
volume.AttachedMachineID = machineIDPtr(req.MachineID)
|
||||
if err := d.store.UpdateVolume(ctx, volume); err != nil {
|
||||
for _, attachedVolumeID := range attachedUserVolumeIDs {
|
||||
attachedVolume, getErr := d.store.GetVolume(context.Background(), attachedVolumeID)
|
||||
if getErr == nil {
|
||||
attachedVolume.AttachedMachineID = nil
|
||||
_ = d.store.UpdateVolume(context.Background(), *attachedVolume)
|
||||
}
|
||||
}
|
||||
_ = d.store.DeleteVolume(context.Background(), systemVolumeRecord.ID)
|
||||
_ = d.runtime.Delete(context.Background(), *state)
|
||||
return nil, err
|
||||
}
|
||||
attachedUserVolumeIDs = append(attachedUserVolumeIDs, volume.ID)
|
||||
}
|
||||
|
||||
record := model.MachineRecord{
|
||||
ID: req.MachineID,
|
||||
Artifact: req.Artifact,
|
||||
SystemVolumeID: systemVolumeRecord.ID,
|
||||
UserVolumeIDs: append([]contracthost.VolumeID(nil), attachedUserVolumeIDs...),
|
||||
RuntimeHost: state.RuntimeHost,
|
||||
TapDevice: state.TapName,
|
||||
Ports: defaultMachinePorts(),
|
||||
Phase: contracthost.MachinePhaseRunning,
|
||||
PID: state.PID,
|
||||
SocketPath: state.SocketPath,
|
||||
CreatedAt: now,
|
||||
StartedAt: state.StartedAt,
|
||||
}
|
||||
if err := d.store.CreateMachine(ctx, record); err != nil {
|
||||
for _, volume := range userVolumes {
|
||||
volume.AttachedMachineID = nil
|
||||
_ = d.store.UpdateVolume(context.Background(), volume)
|
||||
}
|
||||
_ = d.store.DeleteVolume(context.Background(), systemVolumeRecord.ID)
|
||||
_ = d.runtime.Delete(context.Background(), *state)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
clearOperation = true
|
||||
return &contracthost.CreateMachineResponse{Machine: machineToContract(record)}, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) buildMachineSpec(machineID contracthost.MachineID, artifact *model.ArtifactRecord, userVolumes []model.VolumeRecord, systemVolumePath string) (firecracker.MachineSpec, error) {
|
||||
drives := make([]firecracker.DriveSpec, 0, len(userVolumes))
|
||||
for i, volume := range userVolumes {
|
||||
drives = append(drives, firecracker.DriveSpec{
|
||||
ID: fmt.Sprintf("user-%d", i),
|
||||
Path: volume.Path,
|
||||
ReadOnly: false,
|
||||
})
|
||||
}
|
||||
|
||||
spec := firecracker.MachineSpec{
|
||||
ID: firecracker.MachineID(machineID),
|
||||
VCPUs: defaultGuestVCPUs,
|
||||
MemoryMiB: defaultGuestMemoryMiB,
|
||||
KernelImagePath: artifact.KernelImagePath,
|
||||
RootFSPath: systemVolumePath,
|
||||
KernelArgs: defaultGuestKernelArgs,
|
||||
Drives: drives,
|
||||
}
|
||||
if err := spec.Validate(); err != nil {
|
||||
return firecracker.MachineSpec{}, err
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureArtifact(ctx context.Context, ref contracthost.ArtifactRef) (*model.ArtifactRecord, error) {
|
||||
key := artifactKey(ref)
|
||||
unlock := d.lockArtifact(key)
|
||||
defer unlock()
|
||||
|
||||
if artifact, err := d.store.GetArtifact(ctx, ref); err == nil {
|
||||
return artifact, nil
|
||||
} else if err != store.ErrNotFound {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dir := filepath.Join(d.config.ArtifactsDir, key)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("create artifact dir %q: %w", dir, err)
|
||||
}
|
||||
|
||||
kernelPath := filepath.Join(dir, "kernel")
|
||||
rootFSPath := filepath.Join(dir, "rootfs")
|
||||
if err := downloadFile(ctx, ref.KernelImageURL, kernelPath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := downloadFile(ctx, ref.RootFSURL, rootFSPath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
artifact := model.ArtifactRecord{
|
||||
Ref: ref,
|
||||
LocalKey: key,
|
||||
LocalDir: dir,
|
||||
KernelImagePath: kernelPath,
|
||||
RootFSPath: rootFSPath,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
}
|
||||
if err := d.store.PutArtifact(ctx, artifact); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &artifact, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) loadAttachableUserVolumes(ctx context.Context, machineID contracthost.MachineID, volumeIDs []contracthost.VolumeID) ([]model.VolumeRecord, error) {
|
||||
volumes := make([]model.VolumeRecord, 0, len(volumeIDs))
|
||||
for _, volumeID := range volumeIDs {
|
||||
volume, err := d.store.GetVolume(ctx, volumeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if volume.Kind != contracthost.VolumeKindUser {
|
||||
return nil, fmt.Errorf("volume %q is not a user volume", volumeID)
|
||||
}
|
||||
if volume.AttachedMachineID != nil && *volume.AttachedMachineID != machineID {
|
||||
return nil, fmt.Errorf("volume %q is already attached to machine %q", volumeID, *volume.AttachedMachineID)
|
||||
}
|
||||
volumes = append(volumes, *volume)
|
||||
}
|
||||
return volumes, nil
|
||||
}
|
||||
|
|
@ -1,12 +1,95 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/store"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
type Runtime interface{}
|
||||
const (
|
||||
defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=1 pci=off"
|
||||
defaultGuestMemoryMiB = int64(512)
|
||||
defaultGuestVCPUs = int64(1)
|
||||
defaultSSHPort = uint16(2222)
|
||||
defaultVNCPort = uint16(6080)
|
||||
defaultCopyBufferSize = 1024 * 1024
|
||||
)
|
||||
|
||||
type Runtime interface {
|
||||
Boot(context.Context, firecracker.MachineSpec, []firecracker.NetworkAllocation) (*firecracker.MachineState, error)
|
||||
Inspect(firecracker.MachineState) (*firecracker.MachineState, error)
|
||||
Delete(context.Context, firecracker.MachineState) error
|
||||
}
|
||||
|
||||
type Daemon struct {
|
||||
Store store.Store
|
||||
Runtime Runtime
|
||||
config appconfig.Config
|
||||
store store.Store
|
||||
runtime Runtime
|
||||
|
||||
locksMu sync.Mutex
|
||||
machineLocks map[contracthost.MachineID]*sync.Mutex
|
||||
artifactLocks map[string]*sync.Mutex
|
||||
}
|
||||
|
||||
func New(cfg appconfig.Config, store store.Store, runtime Runtime) (*Daemon, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if store == nil {
|
||||
return nil, fmt.Errorf("store is required")
|
||||
}
|
||||
if runtime == nil {
|
||||
return nil, fmt.Errorf("runtime is required")
|
||||
}
|
||||
for _, dir := range []string{cfg.ArtifactsDir, cfg.MachineDisksDir, cfg.RuntimeDir} {
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("create daemon dir %q: %w", dir, err)
|
||||
}
|
||||
}
|
||||
return &Daemon{
|
||||
config: cfg,
|
||||
store: store,
|
||||
runtime: runtime,
|
||||
machineLocks: make(map[contracthost.MachineID]*sync.Mutex),
|
||||
artifactLocks: make(map[string]*sync.Mutex),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) Health(ctx context.Context) (*contracthost.HealthResponse, error) {
|
||||
if _, err := d.store.ListMachines(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &contracthost.HealthResponse{OK: true}, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) lockMachine(machineID contracthost.MachineID) func() {
|
||||
d.locksMu.Lock()
|
||||
lock, ok := d.machineLocks[machineID]
|
||||
if !ok {
|
||||
lock = &sync.Mutex{}
|
||||
d.machineLocks[machineID] = lock
|
||||
}
|
||||
d.locksMu.Unlock()
|
||||
|
||||
lock.Lock()
|
||||
return lock.Unlock
|
||||
}
|
||||
|
||||
func (d *Daemon) lockArtifact(key string) func() {
|
||||
d.locksMu.Lock()
|
||||
lock, ok := d.artifactLocks[key]
|
||||
if !ok {
|
||||
lock = &sync.Mutex{}
|
||||
d.artifactLocks[key] = lock
|
||||
}
|
||||
d.locksMu.Unlock()
|
||||
|
||||
lock.Lock()
|
||||
return lock.Unlock
|
||||
}
|
||||
|
|
|
|||
211
internal/daemon/daemon_test.go
Normal file
211
internal/daemon/daemon_test.go
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/store"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
type fakeRuntime struct {
|
||||
bootState firecracker.MachineState
|
||||
bootCalls int
|
||||
deleteCalls []firecracker.MachineState
|
||||
lastSpec firecracker.MachineSpec
|
||||
}
|
||||
|
||||
func (f *fakeRuntime) Boot(_ context.Context, spec firecracker.MachineSpec, _ []firecracker.NetworkAllocation) (*firecracker.MachineState, error) {
|
||||
f.bootCalls++
|
||||
f.lastSpec = spec
|
||||
state := f.bootState
|
||||
return &state, nil
|
||||
}
|
||||
|
||||
func (f *fakeRuntime) Inspect(state firecracker.MachineState) (*firecracker.MachineState, error) {
|
||||
copy := state
|
||||
return ©, nil
|
||||
}
|
||||
|
||||
func (f *fakeRuntime) Delete(_ context.Context, state firecracker.MachineState) error {
|
||||
f.deleteCalls = append(f.deleteCalls, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
root := t.TempDir()
|
||||
cfg := testConfig(root)
|
||||
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("create file store: %v", err)
|
||||
}
|
||||
|
||||
startedAt := time.Unix(1700000005, 0).UTC()
|
||||
runtime := &fakeRuntime{
|
||||
bootState: firecracker.MachineState{
|
||||
ID: "vm-1",
|
||||
Phase: firecracker.PhaseRunning,
|
||||
PID: 4321,
|
||||
RuntimeHost: "172.16.0.2",
|
||||
SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "vm-1", "root", "run", "firecracker.sock"),
|
||||
TapName: "fctap0",
|
||||
StartedAt: &startedAt,
|
||||
},
|
||||
}
|
||||
|
||||
hostDaemon, err := New(cfg, fileStore, runtime)
|
||||
if err != nil {
|
||||
t.Fatalf("create daemon: %v", err)
|
||||
}
|
||||
|
||||
kernelPayload := []byte("kernel-image")
|
||||
rootFSPayload := []byte("rootfs-image")
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/kernel":
|
||||
_, _ = w.Write(kernelPayload)
|
||||
case "/rootfs":
|
||||
_, _ = w.Write(rootFSPayload)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
response, err := hostDaemon.CreateMachine(context.Background(), contracthost.CreateMachineRequest{
|
||||
MachineID: "vm-1",
|
||||
Artifact: contracthost.ArtifactRef{
|
||||
KernelImageURL: server.URL + "/kernel",
|
||||
RootFSURL: server.URL + "/rootfs",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create machine: %v", err)
|
||||
}
|
||||
|
||||
if response.Machine.Phase != contracthost.MachinePhaseRunning {
|
||||
t.Fatalf("machine phase mismatch: got %q", response.Machine.Phase)
|
||||
}
|
||||
if response.Machine.RuntimeHost != "172.16.0.2" {
|
||||
t.Fatalf("runtime host mismatch: got %q", response.Machine.RuntimeHost)
|
||||
}
|
||||
if len(response.Machine.Ports) != 2 {
|
||||
t.Fatalf("machine ports mismatch: got %d want 2", len(response.Machine.Ports))
|
||||
}
|
||||
if runtime.bootCalls != 1 {
|
||||
t.Fatalf("boot call count mismatch: got %d want 1", runtime.bootCalls)
|
||||
}
|
||||
if runtime.lastSpec.KernelImagePath == "" || runtime.lastSpec.RootFSPath == "" {
|
||||
t.Fatalf("runtime spec paths not populated: %#v", runtime.lastSpec)
|
||||
}
|
||||
if _, err := os.Stat(runtime.lastSpec.KernelImagePath); err != nil {
|
||||
t.Fatalf("kernel artifact not staged: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(runtime.lastSpec.RootFSPath); err != nil {
|
||||
t.Fatalf("system disk not staged: %v", err)
|
||||
}
|
||||
|
||||
artifact, err := fileStore.GetArtifact(context.Background(), response.Machine.Artifact)
|
||||
if err != nil {
|
||||
t.Fatalf("get artifact: %v", err)
|
||||
}
|
||||
if artifact.KernelImagePath == "" || artifact.RootFSPath == "" {
|
||||
t.Fatalf("artifact paths missing: %#v", artifact)
|
||||
}
|
||||
if payload, err := os.ReadFile(artifact.KernelImagePath); err != nil {
|
||||
t.Fatalf("read kernel artifact: %v", err)
|
||||
} else if string(payload) != string(kernelPayload) {
|
||||
t.Fatalf("kernel artifact payload mismatch: got %q", string(payload))
|
||||
}
|
||||
|
||||
machine, err := fileStore.GetMachine(context.Background(), "vm-1")
|
||||
if err != nil {
|
||||
t.Fatalf("get machine: %v", err)
|
||||
}
|
||||
if machine.SystemVolumeID != "vm-1-system" {
|
||||
t.Fatalf("system volume mismatch: got %q", machine.SystemVolumeID)
|
||||
}
|
||||
|
||||
operations, err := fileStore.ListOperations(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("list operations: %v", err)
|
||||
}
|
||||
if len(operations) != 0 {
|
||||
t.Fatalf("operation journal should be empty after success: got %d entries", len(operations))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateMachineRejectsNonHTTPArtifactURLs(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
root := t.TempDir()
|
||||
cfg := testConfig(root)
|
||||
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("create file store: %v", err)
|
||||
}
|
||||
hostDaemon, err := New(cfg, fileStore, &fakeRuntime{})
|
||||
if err != nil {
|
||||
t.Fatalf("create daemon: %v", err)
|
||||
}
|
||||
|
||||
_, err = hostDaemon.CreateMachine(context.Background(), contracthost.CreateMachineRequest{
|
||||
MachineID: "vm-1",
|
||||
Artifact: contracthost.ArtifactRef{
|
||||
KernelImageURL: "file:///kernel",
|
||||
RootFSURL: "https://example.com/rootfs",
|
||||
},
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected create machine to fail for non-http artifact url")
|
||||
}
|
||||
if got := err.Error(); got != "artifact.kernel_image_url must use http or https" {
|
||||
t.Fatalf("unexpected error: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteMachineMissingIsNoOp(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
root := t.TempDir()
|
||||
cfg := testConfig(root)
|
||||
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("create file store: %v", err)
|
||||
}
|
||||
runtime := &fakeRuntime{}
|
||||
hostDaemon, err := New(cfg, fileStore, runtime)
|
||||
if err != nil {
|
||||
t.Fatalf("create daemon: %v", err)
|
||||
}
|
||||
|
||||
if err := hostDaemon.DeleteMachine(context.Background(), "missing"); err != nil {
|
||||
t.Fatalf("delete missing machine: %v", err)
|
||||
}
|
||||
if len(runtime.deleteCalls) != 0 {
|
||||
t.Fatalf("delete runtime should not be called for missing machine")
|
||||
}
|
||||
}
|
||||
|
||||
func testConfig(root string) appconfig.Config {
|
||||
return appconfig.Config{
|
||||
RootDir: root,
|
||||
StatePath: filepath.Join(root, "state", "state.json"),
|
||||
OperationsPath: filepath.Join(root, "state", "ops.json"),
|
||||
ArtifactsDir: filepath.Join(root, "artifacts"),
|
||||
MachineDisksDir: filepath.Join(root, "machine-disks"),
|
||||
RuntimeDir: filepath.Join(root, "runtime"),
|
||||
SocketPath: filepath.Join(root, "firecracker-host.sock"),
|
||||
FirecrackerBinaryPath: "/usr/bin/firecracker",
|
||||
JailerBinaryPath: "/usr/bin/jailer",
|
||||
}
|
||||
}
|
||||
274
internal/daemon/files.go
Normal file
274
internal/daemon/files.go
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/model"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
func (d *Daemon) systemVolumeID(machineID contracthost.MachineID) contracthost.VolumeID {
|
||||
return contracthost.VolumeID(fmt.Sprintf("%s-system", machineID))
|
||||
}
|
||||
|
||||
func (d *Daemon) systemVolumePath(machineID contracthost.MachineID) string {
|
||||
return filepath.Join(d.config.MachineDisksDir, string(machineID), "system.img")
|
||||
}
|
||||
|
||||
func (d *Daemon) machineRuntimeBaseDir(machineID contracthost.MachineID) string {
|
||||
return filepath.Join(d.config.RuntimeDir, "machines", string(machineID))
|
||||
}
|
||||
|
||||
func artifactKey(ref contracthost.ArtifactRef) string {
|
||||
sum := sha256.Sum256([]byte(ref.KernelImageURL + "\n" + ref.RootFSURL))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func cloneFile(source string, target string) error {
|
||||
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
|
||||
return fmt.Errorf("create target dir for %q: %w", target, err)
|
||||
}
|
||||
|
||||
sourceFile, err := os.Open(source)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open source file %q: %w", source, err)
|
||||
}
|
||||
defer sourceFile.Close()
|
||||
|
||||
sourceInfo, err := sourceFile.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat source file %q: %w", source, err)
|
||||
}
|
||||
|
||||
tmpPath := target + ".tmp"
|
||||
targetFile, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open target file %q: %w", tmpPath, err)
|
||||
}
|
||||
|
||||
if _, err := writeSparseFile(targetFile, sourceFile); err != nil {
|
||||
targetFile.Close()
|
||||
return fmt.Errorf("copy %q to %q: %w", source, tmpPath, err)
|
||||
}
|
||||
if err := targetFile.Truncate(sourceInfo.Size()); err != nil {
|
||||
targetFile.Close()
|
||||
return fmt.Errorf("truncate target file %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := targetFile.Sync(); err != nil {
|
||||
targetFile.Close()
|
||||
return fmt.Errorf("sync target file %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := targetFile.Close(); err != nil {
|
||||
return fmt.Errorf("close target file %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := os.Rename(tmpPath, target); err != nil {
|
||||
return fmt.Errorf("rename target file %q to %q: %w", tmpPath, target, err)
|
||||
}
|
||||
if err := syncDir(filepath.Dir(target)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func downloadFile(ctx context.Context, rawURL string, path string) error {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
} else if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("stat download target %q: %w", path, err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return fmt.Errorf("create download dir for %q: %w", path, err)
|
||||
}
|
||||
|
||||
request, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build download request for %q: %w", rawURL, err)
|
||||
}
|
||||
response, err := http.DefaultClient.Do(request)
|
||||
if err != nil {
|
||||
return fmt.Errorf("download %q: %w", rawURL, err)
|
||||
}
|
||||
defer response.Body.Close()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("download %q: status %d", rawURL, response.StatusCode)
|
||||
}
|
||||
|
||||
tmpPath := path + ".tmp"
|
||||
file, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open download target %q: %w", tmpPath, err)
|
||||
}
|
||||
|
||||
size, err := writeSparseFile(file, response.Body)
|
||||
if err != nil {
|
||||
file.Close()
|
||||
return fmt.Errorf("write download target %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := file.Truncate(size); err != nil {
|
||||
file.Close()
|
||||
return fmt.Errorf("truncate download target %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := file.Sync(); err != nil {
|
||||
file.Close()
|
||||
return fmt.Errorf("sync download target %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("close download target %q: %w", tmpPath, err)
|
||||
}
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
return fmt.Errorf("rename download target %q to %q: %w", tmpPath, path, err)
|
||||
}
|
||||
if err := syncDir(filepath.Dir(path)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeSparseFile(targetFile *os.File, source io.Reader) (int64, error) {
|
||||
buffer := make([]byte, defaultCopyBufferSize)
|
||||
var size int64
|
||||
|
||||
for {
|
||||
count, err := source.Read(buffer)
|
||||
if count > 0 {
|
||||
chunk := buffer[:count]
|
||||
if isZeroChunk(chunk) {
|
||||
if _, seekErr := targetFile.Seek(int64(count), io.SeekCurrent); seekErr != nil {
|
||||
return size, seekErr
|
||||
}
|
||||
} else {
|
||||
if _, writeErr := targetFile.Write(chunk); writeErr != nil {
|
||||
return size, writeErr
|
||||
}
|
||||
}
|
||||
size += int64(count)
|
||||
}
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
if err == io.EOF {
|
||||
return size, nil
|
||||
}
|
||||
return size, err
|
||||
}
|
||||
}
|
||||
|
||||
func isZeroChunk(chunk []byte) bool {
|
||||
for _, value := range chunk {
|
||||
if value != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func defaultMachinePorts() []contracthost.MachinePort {
|
||||
return []contracthost.MachinePort{
|
||||
{Name: contracthost.MachinePortNameSSH, Port: defaultSSHPort, Protocol: contracthost.PortProtocolTCP},
|
||||
{Name: contracthost.MachinePortNameVNC, Port: defaultVNCPort, Protocol: contracthost.PortProtocolTCP},
|
||||
}
|
||||
}
|
||||
|
||||
func machineIDPtr(machineID contracthost.MachineID) *contracthost.MachineID {
|
||||
value := machineID
|
||||
return &value
|
||||
}
|
||||
|
||||
func machineToContract(record model.MachineRecord) contracthost.Machine {
|
||||
return contracthost.Machine{
|
||||
ID: record.ID,
|
||||
Artifact: record.Artifact,
|
||||
SystemVolumeID: record.SystemVolumeID,
|
||||
UserVolumeIDs: append([]contracthost.VolumeID(nil), record.UserVolumeIDs...),
|
||||
RuntimeHost: record.RuntimeHost,
|
||||
Ports: append([]contracthost.MachinePort(nil), record.Ports...),
|
||||
Phase: record.Phase,
|
||||
Error: record.Error,
|
||||
CreatedAt: record.CreatedAt,
|
||||
StartedAt: record.StartedAt,
|
||||
}
|
||||
}
|
||||
|
||||
func machineToRuntimeState(record model.MachineRecord) firecracker.MachineState {
|
||||
phase := firecracker.PhaseStopped
|
||||
switch record.Phase {
|
||||
case contracthost.MachinePhaseRunning:
|
||||
phase = firecracker.PhaseRunning
|
||||
case contracthost.MachinePhaseFailed:
|
||||
phase = firecracker.PhaseFailed
|
||||
}
|
||||
return firecracker.MachineState{
|
||||
ID: firecracker.MachineID(record.ID),
|
||||
Phase: phase,
|
||||
PID: record.PID,
|
||||
RuntimeHost: record.RuntimeHost,
|
||||
SocketPath: record.SocketPath,
|
||||
TapName: record.TapDevice,
|
||||
StartedAt: record.StartedAt,
|
||||
Error: record.Error,
|
||||
}
|
||||
}
|
||||
|
||||
func validateArtifactRef(ref contracthost.ArtifactRef) error {
|
||||
if err := validateDownloadURL("artifact.kernel_image_url", ref.KernelImageURL); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateDownloadURL("artifact.rootfs_url", ref.RootFSURL); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateMachineID(machineID contracthost.MachineID) error {
|
||||
value := strings.TrimSpace(string(machineID))
|
||||
if value == "" {
|
||||
return fmt.Errorf("machine_id is required")
|
||||
}
|
||||
if filepath.Base(value) != value {
|
||||
return fmt.Errorf("machine_id %q must not contain path separators", machineID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateDownloadURL(field string, raw string) error {
|
||||
value := strings.TrimSpace(raw)
|
||||
if value == "" {
|
||||
return fmt.Errorf("%s is required", field)
|
||||
}
|
||||
parsed, err := url.Parse(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s is invalid: %w", field, err)
|
||||
}
|
||||
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||
return fmt.Errorf("%s must use http or https", field)
|
||||
}
|
||||
if strings.TrimSpace(parsed.Host) == "" {
|
||||
return fmt.Errorf("%s host is required", field)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func syncDir(path string) error {
|
||||
dir, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open dir %q: %w", path, err)
|
||||
}
|
||||
if err := dir.Sync(); err != nil {
|
||||
dir.Close()
|
||||
return fmt.Errorf("sync dir %q: %w", path, err)
|
||||
}
|
||||
if err := dir.Close(); err != nil {
|
||||
return fmt.Errorf("close dir %q: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
94
internal/daemon/files_test.go
Normal file
94
internal/daemon/files_test.go
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCloneFilePreservesSparseDiskUsage(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
sourcePath := filepath.Join(root, "source.img")
|
||||
targetPath := filepath.Join(root, "target.img")
|
||||
|
||||
sourceFile, err := os.OpenFile(sourcePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
||||
if err != nil {
|
||||
t.Fatalf("open source file: %v", err)
|
||||
}
|
||||
if _, err := sourceFile.Write([]byte("head")); err != nil {
|
||||
sourceFile.Close()
|
||||
t.Fatalf("write source prefix: %v", err)
|
||||
}
|
||||
if _, err := sourceFile.Seek(32<<20, io.SeekStart); err != nil {
|
||||
sourceFile.Close()
|
||||
t.Fatalf("seek source hole: %v", err)
|
||||
}
|
||||
if _, err := sourceFile.Write([]byte("tail")); err != nil {
|
||||
sourceFile.Close()
|
||||
t.Fatalf("write source suffix: %v", err)
|
||||
}
|
||||
if err := sourceFile.Close(); err != nil {
|
||||
t.Fatalf("close source file: %v", err)
|
||||
}
|
||||
|
||||
sourceInfo, err := os.Stat(sourcePath)
|
||||
if err != nil {
|
||||
t.Fatalf("stat source file: %v", err)
|
||||
}
|
||||
sourceUsage, err := allocatedBytes(sourcePath)
|
||||
if err != nil {
|
||||
t.Fatalf("allocated bytes for source: %v", err)
|
||||
}
|
||||
if sourceUsage >= sourceInfo.Size()/2 {
|
||||
t.Skip("temp filesystem does not expose sparse allocation savings")
|
||||
}
|
||||
|
||||
if err := cloneFile(sourcePath, targetPath); err != nil {
|
||||
t.Fatalf("clone sparse file: %v", err)
|
||||
}
|
||||
|
||||
targetInfo, err := os.Stat(targetPath)
|
||||
if err != nil {
|
||||
t.Fatalf("stat target file: %v", err)
|
||||
}
|
||||
if targetInfo.Size() != sourceInfo.Size() {
|
||||
t.Fatalf("target size mismatch: got %d want %d", targetInfo.Size(), sourceInfo.Size())
|
||||
}
|
||||
|
||||
targetUsage, err := allocatedBytes(targetPath)
|
||||
if err != nil {
|
||||
t.Fatalf("allocated bytes for target: %v", err)
|
||||
}
|
||||
if targetUsage >= targetInfo.Size()/2 {
|
||||
t.Fatalf("target file is not sparse enough: allocated=%d size=%d", targetUsage, targetInfo.Size())
|
||||
}
|
||||
|
||||
targetData, err := os.ReadFile(targetPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read target file: %v", err)
|
||||
}
|
||||
if !bytes.Equal(targetData[:4], []byte("head")) {
|
||||
t.Fatalf("target prefix mismatch: %q", string(targetData[:4]))
|
||||
}
|
||||
if !bytes.Equal(targetData[len(targetData)-4:], []byte("tail")) {
|
||||
t.Fatalf("target suffix mismatch: %q", string(targetData[len(targetData)-4:]))
|
||||
}
|
||||
if !bytes.Equal(targetData[4:4+(1<<20)], make([]byte, 1<<20)) {
|
||||
t.Fatal("target hole contents were not zeroed")
|
||||
}
|
||||
}
|
||||
|
||||
func allocatedBytes(path string) (int64, error) {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
stat, ok := info.Sys().(*syscall.Stat_t)
|
||||
if !ok {
|
||||
return 0, syscall.EINVAL
|
||||
}
|
||||
return stat.Blocks * 512, nil
|
||||
}
|
||||
327
internal/daemon/lifecycle.go
Normal file
327
internal/daemon/lifecycle.go
Normal file
|
|
@ -0,0 +1,327 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/model"
|
||||
"github.com/getcompanion-ai/computer-host/internal/store"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
func (d *Daemon) GetMachine(ctx context.Context, id contracthost.MachineID) (*contracthost.GetMachineResponse, error) {
|
||||
record, err := d.reconcileMachine(ctx, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &contracthost.GetMachineResponse{Machine: machineToContract(*record)}, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ListMachines(ctx context.Context) (*contracthost.ListMachinesResponse, error) {
|
||||
records, err := d.store.ListMachines(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
machines := make([]contracthost.Machine, 0, len(records))
|
||||
for _, record := range records {
|
||||
reconciled, err := d.reconcileMachine(ctx, record.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
machines = append(machines, machineToContract(*reconciled))
|
||||
}
|
||||
return &contracthost.ListMachinesResponse{Machines: machines}, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) StopMachine(ctx context.Context, id contracthost.MachineID) error {
|
||||
unlock := d.lockMachine(id)
|
||||
defer unlock()
|
||||
|
||||
record, err := d.store.GetMachine(ctx, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if record.Phase == contracthost.MachinePhaseStopped {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := d.store.UpsertOperation(ctx, model.OperationRecord{
|
||||
MachineID: id,
|
||||
Type: model.MachineOperationStop,
|
||||
StartedAt: time.Now().UTC(),
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clearOperation := false
|
||||
defer func() {
|
||||
if clearOperation {
|
||||
_ = d.store.DeleteOperation(context.Background(), id)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := d.stopMachineRecord(ctx, record); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clearOperation = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) DeleteMachine(ctx context.Context, id contracthost.MachineID) error {
|
||||
unlock := d.lockMachine(id)
|
||||
defer unlock()
|
||||
|
||||
record, err := d.store.GetMachine(ctx, id)
|
||||
if err == store.ErrNotFound {
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := d.store.UpsertOperation(ctx, model.OperationRecord{
|
||||
MachineID: id,
|
||||
Type: model.MachineOperationDelete,
|
||||
StartedAt: time.Now().UTC(),
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clearOperation := false
|
||||
defer func() {
|
||||
if clearOperation {
|
||||
_ = d.store.DeleteOperation(context.Background(), id)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := d.deleteMachineRecord(ctx, record); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clearOperation = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) Reconcile(ctx context.Context) error {
|
||||
operations, err := d.store.ListOperations(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, operation := range operations {
|
||||
switch operation.Type {
|
||||
case model.MachineOperationCreate:
|
||||
if err := d.reconcileCreate(ctx, operation.MachineID); err != nil {
|
||||
return err
|
||||
}
|
||||
case model.MachineOperationStop:
|
||||
if err := d.reconcileStop(ctx, operation.MachineID); err != nil {
|
||||
return err
|
||||
}
|
||||
case model.MachineOperationDelete:
|
||||
if err := d.reconcileDelete(ctx, operation.MachineID); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported operation type %q", operation.Type)
|
||||
}
|
||||
}
|
||||
|
||||
records, err := d.store.ListMachines(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, record := range records {
|
||||
if _, err := d.reconcileMachine(ctx, record.ID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) listRunningNetworks(ctx context.Context, ignore contracthost.MachineID) ([]firecracker.NetworkAllocation, error) {
|
||||
records, err := d.store.ListMachines(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
networks := make([]firecracker.NetworkAllocation, 0, len(records))
|
||||
for _, record := range records {
|
||||
if record.ID == ignore || record.Phase != contracthost.MachinePhaseRunning {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(record.RuntimeHost) == "" || strings.TrimSpace(record.TapDevice) == "" {
|
||||
continue
|
||||
}
|
||||
network, err := firecracker.AllocationFromGuestIP(record.RuntimeHost, record.TapDevice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
networks = append(networks, network)
|
||||
}
|
||||
return networks, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileCreate(ctx context.Context, machineID contracthost.MachineID) error {
|
||||
_, err := d.store.GetMachine(ctx, machineID)
|
||||
if err == nil {
|
||||
if _, err := d.reconcileMachine(ctx, machineID); err != nil {
|
||||
return err
|
||||
}
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
if err != store.ErrNotFound {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Remove(d.systemVolumePath(machineID)); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("cleanup system volume for %q: %w", machineID, err)
|
||||
}
|
||||
if err := d.store.DeleteVolume(ctx, d.systemVolumeID(machineID)); err != nil && err != store.ErrNotFound {
|
||||
return err
|
||||
}
|
||||
if err := d.detachVolumesForMachine(ctx, machineID); err != nil {
|
||||
return err
|
||||
}
|
||||
_ = os.RemoveAll(filepath.Dir(d.systemVolumePath(machineID)))
|
||||
if err := os.RemoveAll(d.machineRuntimeBaseDir(machineID)); err != nil {
|
||||
return fmt.Errorf("cleanup runtime dir for %q: %w", machineID, err)
|
||||
}
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileStop(ctx context.Context, machineID contracthost.MachineID) error {
|
||||
record, err := d.store.GetMachine(ctx, machineID)
|
||||
if err == store.ErrNotFound {
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.stopMachineRecord(ctx, record); err != nil {
|
||||
return err
|
||||
}
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileDelete(ctx context.Context, machineID contracthost.MachineID) error {
|
||||
record, err := d.store.GetMachine(ctx, machineID)
|
||||
if err == store.ErrNotFound {
|
||||
if err := os.Remove(d.systemVolumePath(machineID)); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := d.store.DeleteVolume(ctx, d.systemVolumeID(machineID)); err != nil && err != store.ErrNotFound {
|
||||
return err
|
||||
}
|
||||
if err := d.detachVolumesForMachine(ctx, machineID); err != nil {
|
||||
return err
|
||||
}
|
||||
_ = os.RemoveAll(filepath.Dir(d.systemVolumePath(machineID)))
|
||||
_ = os.RemoveAll(d.machineRuntimeBaseDir(machineID))
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.deleteMachineRecord(ctx, record); err != nil {
|
||||
return err
|
||||
}
|
||||
return d.store.DeleteOperation(ctx, machineID)
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileMachine(ctx context.Context, machineID contracthost.MachineID) (*model.MachineRecord, error) {
|
||||
unlock := d.lockMachine(machineID)
|
||||
defer unlock()
|
||||
|
||||
record, err := d.store.GetMachine(ctx, machineID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if record.Phase != contracthost.MachinePhaseRunning {
|
||||
return record, nil
|
||||
}
|
||||
|
||||
state, err := d.runtime.Inspect(machineToRuntimeState(*record))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if state.Phase == firecracker.PhaseRunning {
|
||||
return record, nil
|
||||
}
|
||||
|
||||
if err := d.runtime.Delete(ctx, *state); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
record.Phase = contracthost.MachinePhaseFailed
|
||||
record.Error = state.Error
|
||||
record.PID = 0
|
||||
record.SocketPath = ""
|
||||
record.RuntimeHost = ""
|
||||
record.TapDevice = ""
|
||||
record.StartedAt = nil
|
||||
if err := d.store.UpdateMachine(ctx, *record); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) deleteMachineRecord(ctx context.Context, record *model.MachineRecord) error {
|
||||
if err := d.runtime.Delete(ctx, machineToRuntimeState(*record)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.detachVolumesForMachine(ctx, record.ID); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
systemVolume, err := d.store.GetVolume(ctx, record.SystemVolumeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Remove(systemVolume.Path); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("remove system volume %q: %w", systemVolume.Path, err)
|
||||
}
|
||||
if err := os.RemoveAll(filepath.Dir(systemVolume.Path)); err != nil {
|
||||
return fmt.Errorf("remove machine disk dir %q: %w", filepath.Dir(systemVolume.Path), err)
|
||||
}
|
||||
if err := d.store.DeleteVolume(ctx, record.SystemVolumeID); err != nil {
|
||||
return err
|
||||
}
|
||||
return d.store.DeleteMachine(ctx, record.ID)
|
||||
}
|
||||
|
||||
func (d *Daemon) stopMachineRecord(ctx context.Context, record *model.MachineRecord) error {
|
||||
if err := d.runtime.Delete(ctx, machineToRuntimeState(*record)); err != nil {
|
||||
return err
|
||||
}
|
||||
record.Phase = contracthost.MachinePhaseStopped
|
||||
record.Error = ""
|
||||
record.PID = 0
|
||||
record.SocketPath = ""
|
||||
record.RuntimeHost = ""
|
||||
record.TapDevice = ""
|
||||
record.StartedAt = nil
|
||||
return d.store.UpdateMachine(ctx, *record)
|
||||
}
|
||||
|
||||
func (d *Daemon) detachVolumesForMachine(ctx context.Context, machineID contracthost.MachineID) error {
|
||||
volumes, err := d.store.ListVolumes(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, volume := range volumes {
|
||||
if volume.AttachedMachineID == nil || *volume.AttachedMachineID != machineID {
|
||||
continue
|
||||
}
|
||||
volume.AttachedMachineID = nil
|
||||
if err := d.store.UpdateVolume(ctx, volume); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue