host api alignment (#7)

* feat: add Firecracker API client methods for VM pause/resume and snapshots

Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).

* feat: add snapshot data layer - contract types, model, store, config

Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.

* feat: add runtime methods for VM pause, resume, snapshot, and restore

Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.

* feat: add daemon snapshot create, restore, and reconciliation logic

Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.

* feat: add HTTP endpoints for snapshot create, get, list, delete, restore

Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).

* fix: cross-device rename, restore network, and snapshot cleanup

- Replace os.Rename with copy+remove for moving snapshot files out of
  /proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
  uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
  via reconcile
- Reject snapshot requests while a machine operation is already pending

* fix: test and modify snapshot runtime

* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
This commit is contained in:
Hari 2026-04-08 22:21:46 -04:00 committed by GitHub
parent 9382de7eba
commit b5c97aef07
17 changed files with 1287 additions and 20 deletions

View file

@ -17,6 +17,11 @@ type Service interface {
StopMachine(context.Context, contracthost.MachineID) error
DeleteMachine(context.Context, contracthost.MachineID) error
Health(context.Context) (*contracthost.HealthResponse, error)
CreateSnapshot(context.Context, contracthost.MachineID) (*contracthost.CreateSnapshotResponse, error)
ListSnapshots(context.Context, contracthost.MachineID) (*contracthost.ListSnapshotsResponse, error)
GetSnapshot(context.Context, contracthost.SnapshotID) (*contracthost.GetSnapshotResponse, error)
DeleteSnapshotByID(context.Context, contracthost.SnapshotID) error
RestoreSnapshot(context.Context, contracthost.SnapshotID, contracthost.RestoreSnapshotRequest) (*contracthost.RestoreSnapshotResponse, error)
}
type Handler struct {
@ -35,6 +40,7 @@ func (h *Handler) Routes() http.Handler {
mux.HandleFunc("/health", h.handleHealth)
mux.HandleFunc("/machines", h.handleMachines)
mux.HandleFunc("/machines/", h.handleMachine)
mux.HandleFunc("/snapshots/", h.handleSnapshot)
return mux
}
@ -120,6 +126,80 @@ func (h *Handler) handleMachine(w http.ResponseWriter, r *http.Request) {
return
}
if len(parts) == 2 && parts[1] == "snapshots" {
switch r.Method {
case http.MethodGet:
response, err := h.service.ListSnapshots(r.Context(), machineID)
if err != nil {
writeError(w, statusForError(err), err)
return
}
writeJSON(w, http.StatusOK, response)
case http.MethodPost:
response, err := h.service.CreateSnapshot(r.Context(), machineID)
if err != nil {
writeError(w, statusForError(err), err)
return
}
writeJSON(w, http.StatusCreated, response)
default:
writeMethodNotAllowed(w)
}
return
}
writeError(w, http.StatusNotFound, fmt.Errorf("route not found"))
}
func (h *Handler) handleSnapshot(w http.ResponseWriter, r *http.Request) {
path := strings.TrimPrefix(r.URL.Path, "/snapshots/")
if path == "" {
writeError(w, http.StatusNotFound, fmt.Errorf("snapshot id is required"))
return
}
parts := strings.Split(path, "/")
snapshotID := contracthost.SnapshotID(parts[0])
if len(parts) == 1 {
switch r.Method {
case http.MethodGet:
response, err := h.service.GetSnapshot(r.Context(), snapshotID)
if err != nil {
writeError(w, statusForError(err), err)
return
}
writeJSON(w, http.StatusOK, response)
case http.MethodDelete:
if err := h.service.DeleteSnapshotByID(r.Context(), snapshotID); err != nil {
writeError(w, statusForError(err), err)
return
}
w.WriteHeader(http.StatusNoContent)
default:
writeMethodNotAllowed(w)
}
return
}
if len(parts) == 2 && parts[1] == "restore" {
if r.Method != http.MethodPost {
writeMethodNotAllowed(w)
return
}
var req contracthost.RestoreSnapshotRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, err)
return
}
response, err := h.service.RestoreSnapshot(r.Context(), snapshotID, req)
if err != nil {
writeError(w, statusForError(err), err)
return
}
writeJSON(w, http.StatusCreated, response)
return
}
writeError(w, http.StatusNotFound, fmt.Errorf("route not found"))
}