mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 05:02:05 +00:00
feat: vsock mmds snapshot
This commit is contained in:
parent
39f8882c30
commit
07975fb459
13 changed files with 390 additions and 148 deletions
|
|
@ -12,10 +12,10 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
"github.com/getcompanion-ai/computer-host/internal/firecracker"
|
||||
"github.com/getcompanion-ai/computer-host/internal/model"
|
||||
"github.com/getcompanion-ai/computer-host/internal/store"
|
||||
contracthost "github.com/getcompanion-ai/computer-host/contract"
|
||||
)
|
||||
|
||||
func (d *Daemon) CreateSnapshot(ctx context.Context, machineID contracthost.MachineID, req contracthost.CreateSnapshotRequest) (*contracthost.CreateSnapshotResponse, error) {
|
||||
|
|
@ -332,6 +332,9 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
|
|||
restoredDrivePaths[driveID] = volumePath
|
||||
}
|
||||
|
||||
// Do not force vsock_override on restore: Firecracker rejects it for old
|
||||
// snapshots without a vsock device, and the jailed /run path already
|
||||
// relocates safely for snapshots created with the new vsock-backed guest.
|
||||
loadSpec := firecracker.SnapshotLoadSpec{
|
||||
ID: firecracker.MachineID(req.MachineID),
|
||||
SnapshotPath: vmstateArtifact.LocalPath,
|
||||
|
|
@ -349,27 +352,6 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
|
|||
return nil, fmt.Errorf("restore boot: %w", err)
|
||||
}
|
||||
|
||||
// Wait for guest to become ready
|
||||
if err := waitForGuestReady(ctx, machineState.RuntimeHost, defaultMachinePorts()); err != nil {
|
||||
_ = d.runtime.Delete(ctx, *machineState)
|
||||
_ = os.RemoveAll(filepath.Dir(newSystemDiskPath))
|
||||
clearOperation = true
|
||||
return nil, fmt.Errorf("wait for restored guest ready: %w", err)
|
||||
}
|
||||
if err := d.reconfigureGuestIdentity(ctx, machineState.RuntimeHost, req.MachineID, guestConfig); err != nil {
|
||||
_ = d.runtime.Delete(ctx, *machineState)
|
||||
_ = os.RemoveAll(filepath.Dir(newSystemDiskPath))
|
||||
clearOperation = true
|
||||
return nil, fmt.Errorf("reconfigure restored guest identity: %w", err)
|
||||
}
|
||||
guestSSHPublicKey, err := d.readGuestSSHPublicKey(ctx, machineState.RuntimeHost)
|
||||
if err != nil {
|
||||
_ = d.runtime.Delete(ctx, *machineState)
|
||||
_ = os.RemoveAll(filepath.Dir(newSystemDiskPath))
|
||||
clearOperation = true
|
||||
return nil, fmt.Errorf("read restored guest ssh host key: %w", err)
|
||||
}
|
||||
|
||||
systemVolumeID := d.systemVolumeID(req.MachineID)
|
||||
now := time.Now().UTC()
|
||||
|
||||
|
|
@ -419,38 +401,13 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
|
|||
RuntimeHost: machineState.RuntimeHost,
|
||||
TapDevice: machineState.TapName,
|
||||
Ports: defaultMachinePorts(),
|
||||
GuestSSHPublicKey: guestSSHPublicKey,
|
||||
Phase: contracthost.MachinePhaseRunning,
|
||||
GuestSSHPublicKey: "",
|
||||
Phase: contracthost.MachinePhaseStarting,
|
||||
PID: machineState.PID,
|
||||
SocketPath: machineState.SocketPath,
|
||||
CreatedAt: now,
|
||||
StartedAt: machineState.StartedAt,
|
||||
}
|
||||
d.relayAllocMu.Lock()
|
||||
sshRelayPort, err := d.allocateMachineRelayProxy(ctx, machineRecord, contracthost.MachinePortNameSSH, machineRecord.RuntimeHost, defaultSSHPort, minMachineSSHRelayPort, maxMachineSSHRelayPort)
|
||||
var vncRelayPort uint16
|
||||
if err == nil {
|
||||
vncRelayPort, err = d.allocateMachineRelayProxy(ctx, machineRecord, contracthost.MachinePortNameVNC, machineRecord.RuntimeHost, defaultVNCPort, minMachineVNCRelayPort, maxMachineVNCRelayPort)
|
||||
}
|
||||
d.relayAllocMu.Unlock()
|
||||
if err != nil {
|
||||
d.stopMachineRelays(machineRecord.ID)
|
||||
for _, restoredVolumeID := range restoredUserVolumeIDs {
|
||||
_ = d.store.DeleteVolume(context.Background(), restoredVolumeID)
|
||||
}
|
||||
_ = d.store.DeleteVolume(context.Background(), systemVolumeID)
|
||||
_ = d.runtime.Delete(ctx, *machineState)
|
||||
_ = os.RemoveAll(filepath.Dir(newSystemDiskPath))
|
||||
clearOperation = true
|
||||
return nil, fmt.Errorf("allocate relay ports for restored machine: %w", err)
|
||||
}
|
||||
machineRecord.Ports = buildMachinePorts(sshRelayPort, vncRelayPort)
|
||||
startedRelays := true
|
||||
defer func() {
|
||||
if startedRelays {
|
||||
d.stopMachineRelays(machineRecord.ID)
|
||||
}
|
||||
}()
|
||||
if err := d.store.CreateMachine(ctx, machineRecord); err != nil {
|
||||
for _, restoredVolumeID := range restoredUserVolumeIDs {
|
||||
_ = d.store.DeleteVolume(context.Background(), restoredVolumeID)
|
||||
|
|
@ -462,7 +419,6 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
|
|||
return nil, err
|
||||
}
|
||||
|
||||
startedRelays = false
|
||||
clearOperation = true
|
||||
return &contracthost.RestoreSnapshotResponse{
|
||||
Machine: machineToContract(machineRecord),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue