host api alignment (#7)

* feat: add Firecracker API client methods for VM pause/resume and snapshots

Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).

* feat: add snapshot data layer - contract types, model, store, config

Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.

* feat: add runtime methods for VM pause, resume, snapshot, and restore

Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.

* feat: add daemon snapshot create, restore, and reconciliation logic

Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.

* feat: add HTTP endpoints for snapshot create, get, list, delete, restore

Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).

* fix: cross-device rename, restore network, and snapshot cleanup

- Replace os.Rename with copy+remove for moving snapshot files out of
  /proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
  uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
  via reconcile
- Reject snapshot requests while a machine operation is already pending

* fix: test and modify snapshot runtime

* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
This commit is contained in:
Hari 2026-04-08 22:21:46 -04:00 committed by GitHub
parent 9382de7eba
commit b5c97aef07
17 changed files with 1287 additions and 20 deletions

View file

@ -23,9 +23,10 @@ type persistedOperations struct {
}
type persistedState struct {
Artifacts []model.ArtifactRecord `json:"artifacts"`
Machines []model.MachineRecord `json:"machines"`
Volumes []model.VolumeRecord `json:"volumes"`
Artifacts []model.ArtifactRecord `json:"artifacts"`
Machines []model.MachineRecord `json:"machines"`
Volumes []model.VolumeRecord `json:"volumes"`
Snapshots []model.SnapshotRecord `json:"snapshots"`
}
func NewFileStore(statePath string, operationsPath string) (*FileStore, error) {
@ -274,6 +275,73 @@ func (s *FileStore) DeleteOperation(_ context.Context, machineID contracthost.Ma
})
}
func (s *FileStore) CreateSnapshot(_ context.Context, record model.SnapshotRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for _, snap := range state.Snapshots {
if snap.ID == record.ID {
return fmt.Errorf("store: snapshot %q already exists", record.ID)
}
}
state.Snapshots = append(state.Snapshots, record)
return nil
})
}
func (s *FileStore) GetSnapshot(_ context.Context, id contracthost.SnapshotID) (*model.SnapshotRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
for i := range state.Snapshots {
if state.Snapshots[i].ID == id {
record := state.Snapshots[i]
return &record, nil
}
}
return nil, ErrNotFound
}
func (s *FileStore) ListSnapshotsByMachine(_ context.Context, machineID contracthost.MachineID) ([]model.SnapshotRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
var result []model.SnapshotRecord
for _, snap := range state.Snapshots {
if snap.MachineID == machineID {
result = append(result, snap)
}
}
if result == nil {
result = []model.SnapshotRecord{}
}
return result, nil
}
func (s *FileStore) DeleteSnapshot(_ context.Context, id contracthost.SnapshotID) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Snapshots {
if state.Snapshots[i].ID == id {
state.Snapshots = append(state.Snapshots[:i], state.Snapshots[i+1:]...)
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) readOperations() (*persistedOperations, error) {
var operations persistedOperations
if err := readJSONFile(s.operationsPath, &operations); err != nil {
@ -387,6 +455,7 @@ func emptyPersistedState() persistedState {
Artifacts: []model.ArtifactRecord{},
Machines: []model.MachineRecord{},
Volumes: []model.VolumeRecord{},
Snapshots: []model.SnapshotRecord{},
}
}
@ -404,6 +473,9 @@ func normalizeState(state *persistedState) {
if state.Volumes == nil {
state.Volumes = []model.VolumeRecord{}
}
if state.Snapshots == nil {
state.Snapshots = []model.SnapshotRecord{}
}
}
func normalizeOperations(operations *persistedOperations) {

View file

@ -27,4 +27,8 @@ type Store interface {
UpsertOperation(context.Context, model.OperationRecord) error
ListOperations(context.Context) ([]model.OperationRecord, error)
DeleteOperation(context.Context, contracthost.MachineID) error
CreateSnapshot(context.Context, model.SnapshotRecord) error
GetSnapshot(context.Context, contracthost.SnapshotID) (*model.SnapshotRecord, error)
ListSnapshotsByMachine(context.Context, contracthost.MachineID) ([]model.SnapshotRecord, error)
DeleteSnapshot(context.Context, contracthost.SnapshotID) error
}