mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 08:03:40 +00:00
host api alignment (#7)
* feat: add Firecracker API client methods for VM pause/resume and snapshots
Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).
* feat: add snapshot data layer - contract types, model, store, config
Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.
* feat: add runtime methods for VM pause, resume, snapshot, and restore
Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.
* feat: add daemon snapshot create, restore, and reconciliation logic
Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.
* feat: add HTTP endpoints for snapshot create, get, list, delete, restore
Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).
* fix: cross-device rename, restore network, and snapshot cleanup
- Replace os.Rename with copy+remove for moving snapshot files out of
/proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
via reconcile
- Reject snapshot requests while a machine operation is already pending
* fix: test and modify snapshot runtime
* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
This commit is contained in:
parent
9382de7eba
commit
b5c97aef07
17 changed files with 1287 additions and 20 deletions
|
|
@ -146,6 +146,69 @@ func (c *apiClient) PutVsock(ctx context.Context, spec VsockSpec) error {
|
|||
return c.do(ctx, http.MethodPut, "/vsock", body, nil, http.StatusNoContent)
|
||||
}
|
||||
|
||||
type VmState string
|
||||
|
||||
const (
|
||||
VmStatePaused VmState = "Paused"
|
||||
VmStateResumed VmState = "Resumed"
|
||||
)
|
||||
|
||||
type vmRequest struct {
|
||||
State VmState `json:"state"`
|
||||
}
|
||||
|
||||
type vmResponse struct {
|
||||
State string `json:"state"`
|
||||
}
|
||||
|
||||
type SnapshotCreateParams struct {
|
||||
MemFilePath string `json:"mem_file_path"`
|
||||
SnapshotPath string `json:"snapshot_path"`
|
||||
SnapshotType string `json:"snapshot_type"`
|
||||
}
|
||||
|
||||
type SnapshotLoadParams struct {
|
||||
SnapshotPath string `json:"snapshot_path"`
|
||||
MemBackend *MemBackend `json:"mem_backend,omitempty"`
|
||||
ResumeVm bool `json:"resume_vm"`
|
||||
NetworkOverrides []NetworkOverride `json:"network_overrides,omitempty"`
|
||||
VsockOverride *VsockOverride `json:"vsock_override,omitempty"`
|
||||
}
|
||||
|
||||
type MemBackend struct {
|
||||
BackendType string `json:"backend_type"`
|
||||
BackendPath string `json:"backend_path"`
|
||||
}
|
||||
|
||||
type NetworkOverride struct {
|
||||
IfaceID string `json:"iface_id"`
|
||||
HostDevName string `json:"host_dev_name"`
|
||||
}
|
||||
|
||||
type VsockOverride struct {
|
||||
UDSPath string `json:"uds_path"`
|
||||
}
|
||||
|
||||
func (c *apiClient) PatchVm(ctx context.Context, state VmState) error {
|
||||
return c.do(ctx, http.MethodPatch, "/vm", vmRequest{State: state}, nil, http.StatusNoContent)
|
||||
}
|
||||
|
||||
func (c *apiClient) GetVm(ctx context.Context) (*vmResponse, error) {
|
||||
var response vmResponse
|
||||
if err := c.do(ctx, http.MethodGet, "/vm", nil, &response, http.StatusOK); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
func (c *apiClient) PutSnapshotCreate(ctx context.Context, params SnapshotCreateParams) error {
|
||||
return c.do(ctx, http.MethodPut, "/snapshot/create", params, nil, http.StatusNoContent)
|
||||
}
|
||||
|
||||
func (c *apiClient) PutSnapshotLoad(ctx context.Context, params SnapshotLoadParams) error {
|
||||
return c.do(ctx, http.MethodPut, "/snapshot/load", params, nil, http.StatusNoContent)
|
||||
}
|
||||
|
||||
func (c *apiClient) do(ctx context.Context, method string, endpoint string, input any, output any, wantStatus int) error {
|
||||
var body io.Reader
|
||||
if input != nil {
|
||||
|
|
|
|||
54
internal/firecracker/api_test.go
Normal file
54
internal/firecracker/api_test.go
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
package firecracker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPutSnapshotLoadIncludesNetworkOverrides(t *testing.T) {
|
||||
var (
|
||||
gotPath string
|
||||
gotBody string
|
||||
)
|
||||
|
||||
socketPath, shutdown := startUnixSocketServer(t, func(w http.ResponseWriter, r *http.Request) {
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read request body: %v", err)
|
||||
}
|
||||
gotPath = r.URL.Path
|
||||
gotBody = string(body)
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
})
|
||||
defer shutdown()
|
||||
|
||||
client := newAPIClient(socketPath)
|
||||
err := client.PutSnapshotLoad(context.Background(), SnapshotLoadParams{
|
||||
SnapshotPath: "vmstate.bin",
|
||||
MemBackend: &MemBackend{
|
||||
BackendType: "File",
|
||||
BackendPath: "memory.bin",
|
||||
},
|
||||
ResumeVm: false,
|
||||
NetworkOverrides: []NetworkOverride{
|
||||
{
|
||||
IfaceID: "net0",
|
||||
HostDevName: "fctap7",
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("put snapshot load: %v", err)
|
||||
}
|
||||
|
||||
if gotPath != "/snapshot/load" {
|
||||
t.Fatalf("request path mismatch: got %q want %q", gotPath, "/snapshot/load")
|
||||
}
|
||||
|
||||
want := "{\"snapshot_path\":\"vmstate.bin\",\"mem_backend\":{\"backend_type\":\"File\",\"backend_path\":\"memory.bin\"},\"resume_vm\":false,\"network_overrides\":[{\"iface_id\":\"net0\",\"host_dev_name\":\"fctap7\"}]}"
|
||||
if gotBody != want {
|
||||
t.Fatalf("request body mismatch:\n got: %s\nwant: %s", gotBody, want)
|
||||
}
|
||||
}
|
||||
|
|
@ -272,3 +272,11 @@ func stagedFileName(filePath string) (string, error) {
|
|||
}
|
||||
return name, nil
|
||||
}
|
||||
|
||||
func stageSnapshotFile(sourcePath string, chrootRootDir string, name string) (string, error) {
|
||||
target := filepath.Join(chrootRootDir, name)
|
||||
if err := linkMachineFile(sourcePath, target); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return name, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,3 +67,18 @@ func buildMachinePaths(rootDir string, id MachineID, firecrackerBinaryPath strin
|
|||
func procSocketPath(pid int) string {
|
||||
return filepath.Join("/proc", strconv.Itoa(pid), "root", defaultFirecrackerSocketDir, defaultFirecrackerSocketName)
|
||||
}
|
||||
|
||||
type snapshotPaths struct {
|
||||
BaseDir string
|
||||
MemFilePath string
|
||||
StateFilePath string
|
||||
}
|
||||
|
||||
func buildSnapshotPaths(rootDir string, id string) snapshotPaths {
|
||||
baseDir := filepath.Join(rootDir, "snapshots", id)
|
||||
return snapshotPaths{
|
||||
BaseDir: baseDir,
|
||||
MemFilePath: filepath.Join(baseDir, "memory.bin"),
|
||||
StateFilePath: filepath.Join(baseDir, "vmstate.bin"),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -220,6 +220,155 @@ func (r *Runtime) Delete(ctx context.Context, state MachineState) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (r *Runtime) Pause(ctx context.Context, state MachineState) error {
|
||||
client := newAPIClient(state.SocketPath)
|
||||
return client.PatchVm(ctx, VmStatePaused)
|
||||
}
|
||||
|
||||
func (r *Runtime) Resume(ctx context.Context, state MachineState) error {
|
||||
client := newAPIClient(state.SocketPath)
|
||||
return client.PatchVm(ctx, VmStateResumed)
|
||||
}
|
||||
|
||||
func (r *Runtime) CreateSnapshot(ctx context.Context, state MachineState, paths SnapshotPaths) error {
|
||||
client := newAPIClient(state.SocketPath)
|
||||
return client.PutSnapshotCreate(ctx, SnapshotCreateParams{
|
||||
MemFilePath: paths.MemFilePath,
|
||||
SnapshotPath: paths.StateFilePath,
|
||||
SnapshotType: "Full",
|
||||
})
|
||||
}
|
||||
|
||||
func (r *Runtime) RestoreBoot(ctx context.Context, loadSpec SnapshotLoadSpec, usedNetworks []NetworkAllocation) (*MachineState, error) {
|
||||
cleanup := func(network NetworkAllocation, paths machinePaths, command *exec.Cmd, firecrackerPID int) {
|
||||
if preserveFailureArtifacts() {
|
||||
return
|
||||
}
|
||||
cleanupRunningProcess(firecrackerPID)
|
||||
cleanupStartedProcess(command)
|
||||
_ = r.networkProvisioner.Remove(context.Background(), network)
|
||||
if paths.BaseDir != "" {
|
||||
_ = os.RemoveAll(paths.BaseDir)
|
||||
}
|
||||
}
|
||||
|
||||
var network NetworkAllocation
|
||||
if loadSpec.Network != nil {
|
||||
network = *loadSpec.Network
|
||||
} else {
|
||||
var err error
|
||||
network, err = r.networkAllocator.Allocate(usedNetworks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
paths, err := buildMachinePaths(r.rootDir, loadSpec.ID, r.firecrackerBinaryPath)
|
||||
if err != nil {
|
||||
cleanup(network, machinePaths{}, nil, 0)
|
||||
return nil, err
|
||||
}
|
||||
if err := os.MkdirAll(paths.LogDir, 0o755); err != nil {
|
||||
cleanup(network, paths, nil, 0)
|
||||
return nil, fmt.Errorf("create machine log dir %q: %w", paths.LogDir, err)
|
||||
}
|
||||
if err := r.networkProvisioner.Ensure(ctx, network); err != nil {
|
||||
cleanup(network, paths, nil, 0)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
command, err := launchJailedFirecracker(paths, loadSpec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath)
|
||||
if err != nil {
|
||||
cleanup(network, paths, nil, 0)
|
||||
return nil, err
|
||||
}
|
||||
firecrackerPID, err := waitForPIDFile(ctx, paths.PIDFilePath)
|
||||
if err != nil {
|
||||
cleanup(network, paths, command, 0)
|
||||
return nil, fmt.Errorf("wait for firecracker pid: %w", err)
|
||||
}
|
||||
|
||||
socketPath := procSocketPath(firecrackerPID)
|
||||
client := newAPIClient(socketPath)
|
||||
if err := waitForSocket(ctx, client, socketPath); err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("wait for firecracker socket: %w", err)
|
||||
}
|
||||
|
||||
// Stage snapshot files and disk images into the chroot
|
||||
chrootMemPath, err := stageSnapshotFile(loadSpec.MemFilePath, paths.ChrootRootDir, "memory.bin")
|
||||
if err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("stage memory file: %w", err)
|
||||
}
|
||||
chrootStatePath, err := stageSnapshotFile(loadSpec.SnapshotPath, paths.ChrootRootDir, "vmstate.bin")
|
||||
if err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("stage vmstate file: %w", err)
|
||||
}
|
||||
|
||||
// Stage root filesystem
|
||||
rootFSName, err := stagedFileName(loadSpec.RootFSPath)
|
||||
if err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("rootfs path: %w", err)
|
||||
}
|
||||
if err := linkMachineFile(loadSpec.RootFSPath, filepath.Join(paths.ChrootRootDir, rootFSName)); err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("link rootfs into jail: %w", err)
|
||||
}
|
||||
|
||||
// Stage additional drives
|
||||
for driveID, drivePath := range loadSpec.DiskPaths {
|
||||
driveName, err := stagedFileName(drivePath)
|
||||
if err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("drive %q path: %w", driveID, err)
|
||||
}
|
||||
if err := linkMachineFile(drivePath, filepath.Join(paths.ChrootRootDir, driveName)); err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("link drive %q into jail: %w", driveID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Load snapshot (replaces the full configure+start sequence)
|
||||
if err := client.PutSnapshotLoad(ctx, SnapshotLoadParams{
|
||||
SnapshotPath: chrootStatePath,
|
||||
MemBackend: &MemBackend{
|
||||
BackendType: "File",
|
||||
BackendPath: chrootMemPath,
|
||||
},
|
||||
ResumeVm: false,
|
||||
NetworkOverrides: []NetworkOverride{
|
||||
{
|
||||
IfaceID: network.InterfaceID,
|
||||
HostDevName: network.TapName,
|
||||
},
|
||||
},
|
||||
}); err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("load snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Resume the restored VM
|
||||
if err := client.PatchVm(ctx, VmStateResumed); err != nil {
|
||||
cleanup(network, paths, command, firecrackerPID)
|
||||
return nil, fmt.Errorf("resume restored vm: %w", err)
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
state := MachineState{
|
||||
ID: loadSpec.ID,
|
||||
Phase: PhaseRunning,
|
||||
PID: firecrackerPID,
|
||||
RuntimeHost: network.GuestIP().String(),
|
||||
SocketPath: socketPath,
|
||||
TapName: network.TapName,
|
||||
StartedAt: &now,
|
||||
}
|
||||
return &state, nil
|
||||
}
|
||||
|
||||
func processExists(pid int) bool {
|
||||
if pid < 1 {
|
||||
return false
|
||||
|
|
|
|||
|
|
@ -5,6 +5,27 @@ import "time"
|
|||
// Phase represents the lifecycle phase of a local microVM.
|
||||
type Phase string
|
||||
|
||||
// SnapshotPaths holds the file paths for a VM snapshot.
|
||||
type SnapshotPaths struct {
|
||||
MemFilePath string
|
||||
StateFilePath string
|
||||
}
|
||||
|
||||
// SnapshotLoadSpec describes what is needed to restore a VM from a snapshot.
|
||||
type SnapshotLoadSpec struct {
|
||||
ID MachineID
|
||||
SnapshotPath string
|
||||
MemFilePath string
|
||||
DiskPaths map[string]string // drive ID -> host path
|
||||
RootFSPath string
|
||||
KernelImagePath string
|
||||
VCPUs int64
|
||||
MemoryMiB int64
|
||||
KernelArgs string
|
||||
Vsock *VsockSpec
|
||||
Network *NetworkAllocation
|
||||
}
|
||||
|
||||
// MachineState describes the current host local state for a machine.
|
||||
type MachineState struct {
|
||||
ID MachineID
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue