mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 06:04:38 +00:00
host api alignment (#7)
* feat: add Firecracker API client methods for VM pause/resume and snapshots
Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).
* feat: add snapshot data layer - contract types, model, store, config
Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.
* feat: add runtime methods for VM pause, resume, snapshot, and restore
Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.
* feat: add daemon snapshot create, restore, and reconciliation logic
Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.
* feat: add HTTP endpoints for snapshot create, get, list, delete, restore
Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).
* fix: cross-device rename, restore network, and snapshot cleanup
- Replace os.Rename with copy+remove for moving snapshot files out of
/proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
via reconcile
- Reject snapshot requests while a machine operation is already pending
* fix: test and modify snapshot runtime
* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
This commit is contained in:
parent
9382de7eba
commit
b5c97aef07
17 changed files with 1287 additions and 20 deletions
|
|
@ -128,6 +128,14 @@ func (d *Daemon) Reconcile(ctx context.Context) error {
|
|||
if err := d.reconcileDelete(ctx, operation.MachineID); err != nil {
|
||||
return err
|
||||
}
|
||||
case model.MachineOperationSnapshot:
|
||||
if err := d.reconcileSnapshot(ctx, operation); err != nil {
|
||||
return err
|
||||
}
|
||||
case model.MachineOperationRestore:
|
||||
if err := d.reconcileRestore(ctx, operation); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported operation type %q", operation.Type)
|
||||
}
|
||||
|
|
@ -325,3 +333,36 @@ func (d *Daemon) detachVolumesForMachine(ctx context.Context, machineID contract
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileSnapshot(ctx context.Context, operation model.OperationRecord) error {
|
||||
if operation.SnapshotID == nil {
|
||||
return d.store.DeleteOperation(ctx, operation.MachineID)
|
||||
}
|
||||
_, err := d.store.GetSnapshot(ctx, *operation.SnapshotID)
|
||||
if err == nil {
|
||||
// Snapshot completed successfully, just clear the journal
|
||||
return d.store.DeleteOperation(ctx, operation.MachineID)
|
||||
}
|
||||
// Snapshot did not complete: clean up partial snapshot directory and resume the machine
|
||||
snapshotDir := filepath.Join(d.config.SnapshotsDir, string(*operation.SnapshotID))
|
||||
_ = os.RemoveAll(snapshotDir)
|
||||
|
||||
// Try to resume the source machine in case it was left paused
|
||||
record, err := d.store.GetMachine(ctx, operation.MachineID)
|
||||
if err == nil && record.Phase == contracthost.MachinePhaseRunning && record.PID > 0 {
|
||||
_ = d.runtime.Resume(ctx, machineToRuntimeState(*record))
|
||||
}
|
||||
return d.store.DeleteOperation(ctx, operation.MachineID)
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcileRestore(ctx context.Context, operation model.OperationRecord) error {
|
||||
_, err := d.store.GetMachine(ctx, operation.MachineID)
|
||||
if err == nil {
|
||||
// Restore completed, clear journal
|
||||
return d.store.DeleteOperation(ctx, operation.MachineID)
|
||||
}
|
||||
// Restore did not complete: clean up partial machine directory and disk
|
||||
_ = os.RemoveAll(filepath.Dir(d.systemVolumePath(operation.MachineID)))
|
||||
_ = os.RemoveAll(d.machineRuntimeBaseDir(operation.MachineID))
|
||||
return d.store.DeleteOperation(ctx, operation.MachineID)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue