mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 01:00:27 +00:00
feat: power-cycle
This commit is contained in:
parent
0e5e989192
commit
d0f0530ca2
7 changed files with 242 additions and 18 deletions
|
|
@ -6,6 +6,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||||
)
|
)
|
||||||
|
|
@ -27,6 +28,7 @@ type Config struct {
|
||||||
SocketPath string
|
SocketPath string
|
||||||
HTTPAddr string
|
HTTPAddr string
|
||||||
EgressInterface string
|
EgressInterface string
|
||||||
|
ReconcileInterval time.Duration
|
||||||
FirecrackerBinaryPath string
|
FirecrackerBinaryPath string
|
||||||
JailerBinaryPath string
|
JailerBinaryPath string
|
||||||
GuestLoginCAPublicKey string
|
GuestLoginCAPublicKey string
|
||||||
|
|
@ -67,6 +69,10 @@ func Load() (Config, error) {
|
||||||
JailerBinaryPath: strings.TrimSpace(os.Getenv("JAILER_BINARY_PATH")),
|
JailerBinaryPath: strings.TrimSpace(os.Getenv("JAILER_BINARY_PATH")),
|
||||||
GuestLoginCAPublicKey: strings.TrimSpace(os.Getenv("GUEST_LOGIN_CA_PUBLIC_KEY")),
|
GuestLoginCAPublicKey: strings.TrimSpace(os.Getenv("GUEST_LOGIN_CA_PUBLIC_KEY")),
|
||||||
}
|
}
|
||||||
|
cfg.ReconcileInterval, err = durationDefault("FIRECRACKER_HOST_RECONCILE_INTERVAL", 5*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
if err := cfg.Validate(); err != nil {
|
if err := cfg.Validate(); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
|
|
@ -114,6 +120,9 @@ func (c Config) Validate() error {
|
||||||
if strings.TrimSpace(c.EgressInterface) == "" {
|
if strings.TrimSpace(c.EgressInterface) == "" {
|
||||||
return fmt.Errorf("FIRECRACKER_HOST_EGRESS_INTERFACE is required")
|
return fmt.Errorf("FIRECRACKER_HOST_EGRESS_INTERFACE is required")
|
||||||
}
|
}
|
||||||
|
if c.ReconcileInterval <= 0 {
|
||||||
|
return fmt.Errorf("FIRECRACKER_HOST_RECONCILE_INTERVAL must be greater than zero")
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -174,3 +183,15 @@ func loadBool(name string) (bool, error) {
|
||||||
}
|
}
|
||||||
return parsed, nil
|
return parsed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func durationDefault(name string, fallback time.Duration) (time.Duration, error) {
|
||||||
|
value := strings.TrimSpace(os.Getenv(name))
|
||||||
|
if value == "" {
|
||||||
|
return fallback, nil
|
||||||
|
}
|
||||||
|
parsed, err := time.ParseDuration(value)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("%s must be a duration: %w", name, err)
|
||||||
|
}
|
||||||
|
return parsed, nil
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,12 @@ func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachi
|
||||||
if err := os.Truncate(systemVolumePath, defaultGuestDiskSizeBytes); err != nil {
|
if err := os.Truncate(systemVolumePath, defaultGuestDiskSizeBytes); err != nil {
|
||||||
return nil, fmt.Errorf("expand system volume for %q: %w", req.MachineID, err)
|
return nil, fmt.Errorf("expand system volume for %q: %w", req.MachineID, err)
|
||||||
}
|
}
|
||||||
|
if err := injectMachineIdentity(ctx, systemVolumePath, req.MachineID); err != nil {
|
||||||
|
return nil, fmt.Errorf("inject machine identity for %q: %w", req.MachineID, err)
|
||||||
|
}
|
||||||
|
if err := injectGuestConfig(ctx, systemVolumePath, guestConfig); err != nil {
|
||||||
|
return nil, fmt.Errorf("inject guest config for %q: %w", req.MachineID, err)
|
||||||
|
}
|
||||||
removeSystemVolumeOnFailure := true
|
removeSystemVolumeOnFailure := true
|
||||||
defer func() {
|
defer func() {
|
||||||
if !removeSystemVolumeOnFailure {
|
if !removeSystemVolumeOnFailure {
|
||||||
|
|
|
||||||
|
|
@ -236,6 +236,23 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
|
||||||
if machine.GuestConfig == nil || len(machine.GuestConfig.AuthorizedKeys) == 0 {
|
if machine.GuestConfig == nil || len(machine.GuestConfig.AuthorizedKeys) == 0 {
|
||||||
t.Fatalf("stored guest config missing authorized keys: %#v", machine.GuestConfig)
|
t.Fatalf("stored guest config missing authorized keys: %#v", machine.GuestConfig)
|
||||||
}
|
}
|
||||||
|
machineName, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/microagent/machine-name")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read injected machine-name: %v", err)
|
||||||
|
}
|
||||||
|
if machineName != "vm-1\n" {
|
||||||
|
t.Fatalf("machine-name mismatch: got %q want %q", machineName, "vm-1\n")
|
||||||
|
}
|
||||||
|
injectedAuthorizedKeys, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/microagent/authorized_keys")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read injected authorized_keys: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(injectedAuthorizedKeys, strings.TrimSpace(string(hostAuthorizedKeyBytes))) {
|
||||||
|
t.Fatalf("disk authorized_keys missing backend ssh key: %q", injectedAuthorizedKeys)
|
||||||
|
}
|
||||||
|
if !strings.Contains(injectedAuthorizedKeys, "daemon-test") {
|
||||||
|
t.Fatalf("disk authorized_keys missing request override key: %q", injectedAuthorizedKeys)
|
||||||
|
}
|
||||||
|
|
||||||
operations, err := fileStore.ListOperations(context.Background())
|
operations, err := fileStore.ListOperations(context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -260,6 +277,11 @@ func TestStopMachineSyncsGuestFilesystemBeforeDelete(t *testing.T) {
|
||||||
t.Fatalf("create daemon: %v", err)
|
t.Fatalf("create daemon: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var syncedHost string
|
||||||
|
hostDaemon.syncGuestFilesystem = func(_ context.Context, runtimeHost string) error {
|
||||||
|
syncedHost = runtimeHost
|
||||||
|
return nil
|
||||||
|
}
|
||||||
var shutdownHost string
|
var shutdownHost string
|
||||||
hostDaemon.shutdownGuest = func(_ context.Context, runtimeHost string) error {
|
hostDaemon.shutdownGuest = func(_ context.Context, runtimeHost string) error {
|
||||||
shutdownHost = runtimeHost
|
shutdownHost = runtimeHost
|
||||||
|
|
@ -295,6 +317,9 @@ func TestStopMachineSyncsGuestFilesystemBeforeDelete(t *testing.T) {
|
||||||
if shutdownHost != "172.16.0.2" {
|
if shutdownHost != "172.16.0.2" {
|
||||||
t.Fatalf("shutdown host mismatch: got %q want %q", shutdownHost, "172.16.0.2")
|
t.Fatalf("shutdown host mismatch: got %q want %q", shutdownHost, "172.16.0.2")
|
||||||
}
|
}
|
||||||
|
if syncedHost != "172.16.0.2" {
|
||||||
|
t.Fatalf("sync host mismatch: got %q want %q", syncedHost, "172.16.0.2")
|
||||||
|
}
|
||||||
// runtime.Delete is always called to clean up TAP device and runtime dir.
|
// runtime.Delete is always called to clean up TAP device and runtime dir.
|
||||||
if len(runtime.deleteCalls) != 1 {
|
if len(runtime.deleteCalls) != 1 {
|
||||||
t.Fatalf("runtime delete call count mismatch: got %d want 1", len(runtime.deleteCalls))
|
t.Fatalf("runtime delete call count mismatch: got %d want 1", len(runtime.deleteCalls))
|
||||||
|
|
@ -361,6 +386,10 @@ func TestReconcileStartingMachinePersonalizesBeforeRunning(t *testing.T) {
|
||||||
t.Fatalf("create machine: %v", err)
|
t.Fatalf("create machine: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := hostDaemon.Reconcile(context.Background()); err != nil {
|
||||||
|
t.Fatalf("Reconcile returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
response, err := hostDaemon.GetMachine(context.Background(), "vm-starting")
|
response, err := hostDaemon.GetMachine(context.Background(), "vm-starting")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("GetMachine returned error: %v", err)
|
t.Fatalf("GetMachine returned error: %v", err)
|
||||||
|
|
@ -376,6 +405,116 @@ func TestReconcileStartingMachinePersonalizesBeforeRunning(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListMachinesDoesNotReconcileStartingMachines(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
cfg := testConfig(root)
|
||||||
|
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create file store: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hostDaemon, err := New(cfg, fileStore, &fakeRuntime{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create daemon: %v", err)
|
||||||
|
}
|
||||||
|
hostDaemon.personalizeGuest = func(context.Context, *model.MachineRecord, firecracker.MachineState) error {
|
||||||
|
t.Fatalf("ListMachines should not reconcile guest personalization")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
hostDaemon.readGuestSSHPublicKey = func(context.Context, string) (string, error) {
|
||||||
|
t.Fatalf("ListMachines should not read guest ssh public key")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now().UTC()
|
||||||
|
if err := fileStore.CreateMachine(context.Background(), model.MachineRecord{
|
||||||
|
ID: "vm-list",
|
||||||
|
SystemVolumeID: "vm-list-system",
|
||||||
|
RuntimeHost: "127.0.0.1",
|
||||||
|
TapDevice: "fctap-list",
|
||||||
|
Ports: defaultMachinePorts(),
|
||||||
|
Phase: contracthost.MachinePhaseStarting,
|
||||||
|
PID: 4321,
|
||||||
|
SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "vm-list", "root", "run", "firecracker.sock"),
|
||||||
|
CreatedAt: now,
|
||||||
|
StartedAt: &now,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("create machine: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := hostDaemon.ListMachines(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListMachines returned error: %v", err)
|
||||||
|
}
|
||||||
|
if len(response.Machines) != 1 {
|
||||||
|
t.Fatalf("machine count = %d, want 1", len(response.Machines))
|
||||||
|
}
|
||||||
|
if response.Machines[0].Phase != contracthost.MachinePhaseStarting {
|
||||||
|
t.Fatalf("machine phase = %q, want %q", response.Machines[0].Phase, contracthost.MachinePhaseStarting)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReconcileStartingMachineIgnoresPersonalizationFailures(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
cfg := testConfig(root)
|
||||||
|
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create file store: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sshListener := listenTestPort(t, int(defaultSSHPort))
|
||||||
|
defer func() { _ = sshListener.Close() }()
|
||||||
|
vncListener := listenTestPort(t, int(defaultVNCPort))
|
||||||
|
defer func() { _ = vncListener.Close() }()
|
||||||
|
|
||||||
|
startedAt := time.Unix(1700000201, 0).UTC()
|
||||||
|
runtime := &fakeRuntime{}
|
||||||
|
hostDaemon, err := New(cfg, fileStore, runtime)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create daemon: %v", err)
|
||||||
|
}
|
||||||
|
hostDaemon.personalizeGuest = func(context.Context, *model.MachineRecord, firecracker.MachineState) error {
|
||||||
|
return errors.New("vsock EOF")
|
||||||
|
}
|
||||||
|
hostDaemon.readGuestSSHPublicKey = func(context.Context, string) (string, error) {
|
||||||
|
return "", errors.New("Permission denied")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := fileStore.CreateMachine(context.Background(), model.MachineRecord{
|
||||||
|
ID: "vm-best-effort",
|
||||||
|
SystemVolumeID: "vm-best-effort-system",
|
||||||
|
RuntimeHost: "127.0.0.1",
|
||||||
|
TapDevice: "fctap-best-effort",
|
||||||
|
Ports: defaultMachinePorts(),
|
||||||
|
GuestSSHPublicKey: "ssh-ed25519 AAAAExistingHostKey",
|
||||||
|
Phase: contracthost.MachinePhaseStarting,
|
||||||
|
PID: 4322,
|
||||||
|
SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "vm-best-effort", "root", "run", "firecracker.sock"),
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
StartedAt: &startedAt,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("create machine: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := hostDaemon.Reconcile(context.Background()); err != nil {
|
||||||
|
t.Fatalf("Reconcile returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record, err := fileStore.GetMachine(context.Background(), "vm-best-effort")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get machine: %v", err)
|
||||||
|
}
|
||||||
|
if record.Phase != contracthost.MachinePhaseRunning {
|
||||||
|
t.Fatalf("machine phase = %q, want %q", record.Phase, contracthost.MachinePhaseRunning)
|
||||||
|
}
|
||||||
|
if record.GuestSSHPublicKey != "ssh-ed25519 AAAAExistingHostKey" {
|
||||||
|
t.Fatalf("guest ssh public key = %q, want preserved value", record.GuestSSHPublicKey)
|
||||||
|
}
|
||||||
|
if len(runtime.deleteCalls) != 0 {
|
||||||
|
t.Fatalf("runtime delete calls = %d, want 0", len(runtime.deleteCalls))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNewEnsuresBackendSSHKeyPair(t *testing.T) {
|
func TestNewEnsuresBackendSSHKeyPair(t *testing.T) {
|
||||||
root := t.TempDir()
|
root := t.TempDir()
|
||||||
cfg := testConfig(root)
|
cfg := testConfig(root)
|
||||||
|
|
@ -436,7 +575,7 @@ func TestRestoreSnapshotFallsBackToLocalSnapshotNetwork(t *testing.T) {
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": buildTestExt4ImageBytes(t),
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|
@ -582,7 +721,7 @@ func TestRestoreSnapshotUsesLocalSnapshotArtifacts(t *testing.T) {
|
||||||
t.Fatalf("create snapshot dir: %v", err)
|
t.Fatalf("create snapshot dir: %v", err)
|
||||||
}
|
}
|
||||||
systemPath := filepath.Join(snapshotDir, "system.img")
|
systemPath := filepath.Join(snapshotDir, "system.img")
|
||||||
if err := os.WriteFile(systemPath, []byte("disk"), 0o644); err != nil {
|
if err := os.WriteFile(systemPath, buildTestExt4ImageBytes(t), 0o644); err != nil {
|
||||||
t.Fatalf("write system disk: %v", err)
|
t.Fatalf("write system disk: %v", err)
|
||||||
}
|
}
|
||||||
if err := fileStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
|
if err := fileStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
|
||||||
|
|
@ -620,6 +759,13 @@ func TestRestoreSnapshotUsesLocalSnapshotArtifacts(t *testing.T) {
|
||||||
if runtime.restoreCalls != 0 {
|
if runtime.restoreCalls != 0 {
|
||||||
t.Fatalf("restore boot call count mismatch: got %d want 0", runtime.restoreCalls)
|
t.Fatalf("restore boot call count mismatch: got %d want 0", runtime.restoreCalls)
|
||||||
}
|
}
|
||||||
|
machineName, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/microagent/machine-name")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read restored machine-name: %v", err)
|
||||||
|
}
|
||||||
|
if machineName != "restored-local\n" {
|
||||||
|
t.Fatalf("restored machine-name mismatch: got %q want %q", machineName, "restored-local\n")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetSnapshotArtifactReturnsLocalArtifactPath(t *testing.T) {
|
func TestGetSnapshotArtifactReturnsLocalArtifactPath(t *testing.T) {
|
||||||
|
|
@ -713,7 +859,7 @@ func TestRestoreSnapshotUsesDurableSnapshotSpec(t *testing.T) {
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": buildTestExt4ImageBytes(t),
|
||||||
"/user-0": []byte("user-disk"),
|
"/user-0": []byte("user-disk"),
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
@ -820,7 +966,7 @@ func TestRestoreSnapshotBootsWithFreshNetworkWhenSourceNetworkInUseOnHost(t *tes
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": buildTestExt4ImageBytes(t),
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|
@ -946,11 +1092,27 @@ func testConfig(root string) appconfig.Config {
|
||||||
DriveIOEngine: firecracker.DriveIOEngineSync,
|
DriveIOEngine: firecracker.DriveIOEngineSync,
|
||||||
SocketPath: filepath.Join(root, "firecracker-host.sock"),
|
SocketPath: filepath.Join(root, "firecracker-host.sock"),
|
||||||
EgressInterface: "eth0",
|
EgressInterface: "eth0",
|
||||||
|
ReconcileInterval: time.Second,
|
||||||
FirecrackerBinaryPath: "/usr/bin/firecracker",
|
FirecrackerBinaryPath: "/usr/bin/firecracker",
|
||||||
JailerBinaryPath: "/usr/bin/jailer",
|
JailerBinaryPath: "/usr/bin/jailer",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildTestExt4ImageBytes(t *testing.T) []byte {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
root := t.TempDir()
|
||||||
|
imagePath := filepath.Join(root, "rootfs.ext4")
|
||||||
|
if err := buildTestExt4Image(root, imagePath); err != nil {
|
||||||
|
t.Fatalf("build ext4 image: %v", err)
|
||||||
|
}
|
||||||
|
payload, err := os.ReadFile(imagePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read ext4 image: %v", err)
|
||||||
|
}
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
func TestGuestKernelArgsDisablesPCIByDefault(t *testing.T) {
|
func TestGuestKernelArgsDisablesPCIByDefault(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func (d *Daemon) GetMachine(ctx context.Context, id contracthost.MachineID) (*contracthost.GetMachineResponse, error) {
|
func (d *Daemon) GetMachine(ctx context.Context, id contracthost.MachineID) (*contracthost.GetMachineResponse, error) {
|
||||||
record, err := d.reconcileMachine(ctx, id)
|
record, err := d.store.GetMachine(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
@ -32,11 +32,7 @@ func (d *Daemon) ListMachines(ctx context.Context) (*contracthost.ListMachinesRe
|
||||||
|
|
||||||
machines := make([]contracthost.Machine, 0, len(records))
|
machines := make([]contracthost.Machine, 0, len(records))
|
||||||
for _, record := range records {
|
for _, record := range records {
|
||||||
reconciled, err := d.reconcileMachine(ctx, record.ID)
|
machines = append(machines, machineToContract(record))
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
machines = append(machines, machineToContract(*reconciled))
|
|
||||||
}
|
}
|
||||||
return &contracthost.ListMachinesResponse{Machines: machines}, nil
|
return &contracthost.ListMachinesResponse{Machines: machines}, nil
|
||||||
}
|
}
|
||||||
|
|
@ -391,11 +387,12 @@ func (d *Daemon) reconcileMachine(ctx context.Context, machineID contracthost.Ma
|
||||||
return record, nil
|
return record, nil
|
||||||
}
|
}
|
||||||
if err := d.personalizeGuest(ctx, record, *state); err != nil {
|
if err := d.personalizeGuest(ctx, record, *state); err != nil {
|
||||||
return d.failMachineStartup(ctx, record, err.Error())
|
fmt.Fprintf(os.Stderr, "warning: guest personalization for %q failed: %v\n", record.ID, err)
|
||||||
}
|
}
|
||||||
guestSSHPublicKey, err := d.readGuestSSHPublicKey(ctx, state.RuntimeHost)
|
guestSSHPublicKey, err := d.readGuestSSHPublicKey(ctx, state.RuntimeHost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return d.failMachineStartup(ctx, record, err.Error())
|
fmt.Fprintf(os.Stderr, "warning: read guest ssh public key for %q failed: %v\n", record.ID, err)
|
||||||
|
guestSSHPublicKey = record.GuestSSHPublicKey
|
||||||
}
|
}
|
||||||
record.RuntimeHost = state.RuntimeHost
|
record.RuntimeHost = state.RuntimeHost
|
||||||
record.TapDevice = state.TapName
|
record.TapDevice = state.TapName
|
||||||
|
|
@ -501,6 +498,9 @@ func (d *Daemon) stopMachineRecord(ctx context.Context, record *model.MachineRec
|
||||||
d.stopPublishedPortsForMachine(record.ID)
|
d.stopPublishedPortsForMachine(record.ID)
|
||||||
|
|
||||||
if record.Phase == contracthost.MachinePhaseRunning && strings.TrimSpace(record.RuntimeHost) != "" {
|
if record.Phase == contracthost.MachinePhaseRunning && strings.TrimSpace(record.RuntimeHost) != "" {
|
||||||
|
if err := d.syncGuestFilesystem(ctx, record.RuntimeHost); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "warning: guest filesystem sync for %q failed: %v\n", record.ID, err)
|
||||||
|
}
|
||||||
d.shutdownGuestClean(ctx, record)
|
d.shutdownGuestClean(ctx, record)
|
||||||
}
|
}
|
||||||
// Always call runtime.Delete: it cleans up the TAP device, runtime
|
// Always call runtime.Delete: it cleans up the TAP device, runtime
|
||||||
|
|
|
||||||
|
|
@ -468,8 +468,9 @@ func TestRestoreSnapshotTransitionsToStartingWithoutRelayAllocation(t *testing.T
|
||||||
if err := os.MkdirAll(snapDir, 0o755); err != nil {
|
if err := os.MkdirAll(snapDir, 0o755); err != nil {
|
||||||
t.Fatalf("create snapshot dir: %v", err)
|
t.Fatalf("create snapshot dir: %v", err)
|
||||||
}
|
}
|
||||||
|
systemDisk := buildTestExt4ImageBytes(t)
|
||||||
snapDisk := filepath.Join(snapDir, "system.img")
|
snapDisk := filepath.Join(snapDir, "system.img")
|
||||||
if err := os.WriteFile(snapDisk, []byte("disk"), 0o644); err != nil {
|
if err := os.WriteFile(snapDisk, systemDisk, 0o644); err != nil {
|
||||||
t.Fatalf("write snapshot disk: %v", err)
|
t.Fatalf("write snapshot disk: %v", err)
|
||||||
}
|
}
|
||||||
if err := baseStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
|
if err := baseStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
|
||||||
|
|
@ -487,7 +488,7 @@ func TestRestoreSnapshotTransitionsToStartingWithoutRelayAllocation(t *testing.T
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": systemDisk,
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|
@ -594,8 +595,16 @@ func TestStopMachineContinuesWhenGuestSyncFails(t *testing.T) {
|
||||||
t.Fatalf("create daemon: %v", err)
|
t.Fatalf("create daemon: %v", err)
|
||||||
}
|
}
|
||||||
stubGuestSSHPublicKeyReader(hostDaemon)
|
stubGuestSSHPublicKeyReader(hostDaemon)
|
||||||
|
hostDaemon.syncGuestFilesystem = func(context.Context, string) error {
|
||||||
|
return errors.New("guest sync failed")
|
||||||
|
}
|
||||||
hostDaemon.shutdownGuest = func(context.Context, string) error {
|
hostDaemon.shutdownGuest = func(context.Context, string) error {
|
||||||
return errors.New("guest shutdown failed")
|
runtime.inspectOverride = func(state firecracker.MachineState) (*firecracker.MachineState, error) {
|
||||||
|
state.Phase = firecracker.PhaseStopped
|
||||||
|
state.PID = 0
|
||||||
|
return &state, nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
|
|
@ -678,11 +687,12 @@ func TestRestoreSnapshotCleansStagingArtifactsAfterSuccess(t *testing.T) {
|
||||||
}
|
}
|
||||||
stubGuestSSHPublicKeyReader(hostDaemon)
|
stubGuestSSHPublicKeyReader(hostDaemon)
|
||||||
hostDaemon.reconfigureGuestIdentity = func(context.Context, string, contracthost.MachineID, *contracthost.GuestConfig) error { return nil }
|
hostDaemon.reconfigureGuestIdentity = func(context.Context, string, contracthost.MachineID, *contracthost.GuestConfig) error { return nil }
|
||||||
|
systemDisk := buildTestExt4ImageBytes(t)
|
||||||
|
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": systemDisk,
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|
@ -699,7 +709,7 @@ func TestRestoreSnapshotCleansStagingArtifactsAfterSuccess(t *testing.T) {
|
||||||
SourceRuntimeHost: "172.16.0.2",
|
SourceRuntimeHost: "172.16.0.2",
|
||||||
SourceTapDevice: "fctap0",
|
SourceTapDevice: "fctap0",
|
||||||
Artifacts: []contracthost.SnapshotArtifact{
|
Artifacts: []contracthost.SnapshotArtifact{
|
||||||
{ID: "disk-system", Kind: contracthost.SnapshotArtifactKindDisk, Name: "system.img", DownloadURL: server.URL + "/system", SHA256Hex: mustSHA256Hex(t, []byte("disk"))},
|
{ID: "disk-system", Kind: contracthost.SnapshotArtifactKindDisk, Name: "system.img", DownloadURL: server.URL + "/system", SHA256Hex: mustSHA256Hex(t, systemDisk)},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
@ -727,11 +737,12 @@ func TestRestoreSnapshotCleansStagingArtifactsAfterDownloadFailure(t *testing.T)
|
||||||
t.Fatalf("create daemon: %v", err)
|
t.Fatalf("create daemon: %v", err)
|
||||||
}
|
}
|
||||||
stubGuestSSHPublicKeyReader(hostDaemon)
|
stubGuestSSHPublicKeyReader(hostDaemon)
|
||||||
|
systemDisk := buildTestExt4ImageBytes(t)
|
||||||
|
|
||||||
server := newRestoreArtifactServer(t, map[string][]byte{
|
server := newRestoreArtifactServer(t, map[string][]byte{
|
||||||
"/kernel": []byte("kernel"),
|
"/kernel": []byte("kernel"),
|
||||||
"/rootfs": []byte("rootfs"),
|
"/rootfs": []byte("rootfs"),
|
||||||
"/system": []byte("disk"),
|
"/system": systemDisk,
|
||||||
})
|
})
|
||||||
defer server.Close()
|
defer server.Close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -265,6 +265,14 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
|
||||||
clearOperation = true
|
clearOperation = true
|
||||||
return nil, fmt.Errorf("copy system disk for restore: %w", err)
|
return nil, fmt.Errorf("copy system disk for restore: %w", err)
|
||||||
}
|
}
|
||||||
|
if err := injectMachineIdentity(ctx, newSystemDiskPath, req.MachineID); err != nil {
|
||||||
|
clearOperation = true
|
||||||
|
return nil, fmt.Errorf("inject machine identity for restore: %w", err)
|
||||||
|
}
|
||||||
|
if err := injectGuestConfig(ctx, newSystemDiskPath, guestConfig); err != nil {
|
||||||
|
clearOperation = true
|
||||||
|
return nil, fmt.Errorf("inject guest config for restore: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
type restoredUserVolume struct {
|
type restoredUserVolume struct {
|
||||||
ID contracthost.VolumeID
|
ID contracthost.VolumeID
|
||||||
|
|
|
||||||
16
main.go
16
main.go
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
|
|
@ -100,6 +101,21 @@ func main() {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
group.Go(func() error {
|
||||||
|
ticker := time.NewTicker(cfg.ReconcileInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-groupCtx.Done():
|
||||||
|
return nil
|
||||||
|
case <-ticker.C:
|
||||||
|
if err := hostDaemon.Reconcile(groupCtx); err != nil && groupCtx.Err() == nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "warning: firecracker-host reconcile failed: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
if err := group.Wait(); err != nil {
|
if err := group.Wait(); err != nil {
|
||||||
exit(err)
|
exit(err)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue