mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-15 03:00:42 +00:00
* feat: add Firecracker API client methods for VM pause/resume and snapshots
Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).
* feat: add snapshot data layer - contract types, model, store, config
Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.
* feat: add runtime methods for VM pause, resume, snapshot, and restore
Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.
* feat: add daemon snapshot create, restore, and reconciliation logic
Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.
* feat: add HTTP endpoints for snapshot create, get, list, delete, restore
Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).
* fix: cross-device rename, restore network, and snapshot cleanup
- Replace os.Rename with copy+remove for moving snapshot files out of
/proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
via reconcile
- Reject snapshot requests while a machine operation is already pending
* fix: test and modify snapshot runtime
* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
282 lines
7.8 KiB
Go
282 lines
7.8 KiB
Go
package firecracker
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
defaultCgroupVersion = "2"
|
|
defaultFirecrackerInitTimeout = 10 * time.Second
|
|
defaultFirecrackerLogLevel = "Warning"
|
|
defaultFirecrackerPollInterval = 10 * time.Millisecond
|
|
defaultRootDriveID = "root_drive"
|
|
defaultVSockRunDir = "/run"
|
|
)
|
|
|
|
func configureMachine(ctx context.Context, client *apiClient, paths machinePaths, spec MachineSpec, network NetworkAllocation) error {
|
|
if err := client.PutMachineConfig(ctx, spec); err != nil {
|
|
return fmt.Errorf("put machine config: %w", err)
|
|
}
|
|
if err := client.PutBootSource(ctx, spec); err != nil {
|
|
return fmt.Errorf("put boot source: %w", err)
|
|
}
|
|
for _, drive := range additionalDriveRequests(spec) {
|
|
if err := client.PutDrive(ctx, drive); err != nil {
|
|
return fmt.Errorf("put drive %q: %w", drive.DriveID, err)
|
|
}
|
|
}
|
|
if err := client.PutDrive(ctx, rootDriveRequest(spec)); err != nil {
|
|
return fmt.Errorf("put root drive: %w", err)
|
|
}
|
|
if err := client.PutNetworkInterface(ctx, network); err != nil {
|
|
return fmt.Errorf("put network interface: %w", err)
|
|
}
|
|
if err := client.PutEntropy(ctx); err != nil {
|
|
return fmt.Errorf("put entropy device: %w", err)
|
|
}
|
|
if err := client.PutSerial(ctx, paths.JailedSerialLogPath); err != nil {
|
|
return fmt.Errorf("put serial device: %w", err)
|
|
}
|
|
if spec.Vsock != nil {
|
|
if err := client.PutVsock(ctx, *spec.Vsock); err != nil {
|
|
return fmt.Errorf("put vsock: %w", err)
|
|
}
|
|
}
|
|
if err := client.PutAction(ctx, defaultStartAction); err != nil {
|
|
return fmt.Errorf("start instance: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecrackerBinaryPath string, jailerBinaryPath string) (*exec.Cmd, error) {
|
|
command := exec.Command(
|
|
jailerBinaryPath,
|
|
"--id", string(machineID),
|
|
"--uid", strconv.Itoa(os.Getuid()),
|
|
"--gid", strconv.Itoa(os.Getgid()),
|
|
"--exec-file", firecrackerBinaryPath,
|
|
"--cgroup-version", defaultCgroupVersion,
|
|
"--chroot-base-dir", paths.JailerBaseDir,
|
|
"--daemonize",
|
|
"--new-pid-ns",
|
|
"--",
|
|
"--api-sock", defaultFirecrackerSocketPath,
|
|
"--log-path", paths.JailedFirecrackerLogPath,
|
|
"--level", defaultFirecrackerLogLevel,
|
|
"--show-level",
|
|
"--show-log-origin",
|
|
)
|
|
if err := command.Start(); err != nil {
|
|
return nil, fmt.Errorf("start jailer: %w", err)
|
|
}
|
|
go func() {
|
|
_ = command.Wait()
|
|
}()
|
|
return command, nil
|
|
}
|
|
|
|
func stageMachineFiles(spec MachineSpec, paths machinePaths) (MachineSpec, error) {
|
|
staged := spec
|
|
|
|
kernelImagePath, err := stagedFileName(spec.KernelImagePath)
|
|
if err != nil {
|
|
return MachineSpec{}, fmt.Errorf("kernel image path: %w", err)
|
|
}
|
|
if err := linkMachineFile(spec.KernelImagePath, filepath.Join(paths.ChrootRootDir, kernelImagePath)); err != nil {
|
|
return MachineSpec{}, fmt.Errorf("link kernel image into jail: %w", err)
|
|
}
|
|
staged.KernelImagePath = kernelImagePath
|
|
|
|
rootFSPath, err := stagedFileName(spec.RootFSPath)
|
|
if err != nil {
|
|
return MachineSpec{}, fmt.Errorf("rootfs path: %w", err)
|
|
}
|
|
if err := linkMachineFile(spec.RootFSPath, filepath.Join(paths.ChrootRootDir, rootFSPath)); err != nil {
|
|
return MachineSpec{}, fmt.Errorf("link rootfs into jail: %w", err)
|
|
}
|
|
staged.RootFSPath = rootFSPath
|
|
|
|
staged.Drives = make([]DriveSpec, len(spec.Drives))
|
|
for i, drive := range spec.Drives {
|
|
stagedDrive := drive
|
|
stagedDrivePath, err := stagedFileName(drive.Path)
|
|
if err != nil {
|
|
return MachineSpec{}, fmt.Errorf("drive %q path: %w", drive.ID, err)
|
|
}
|
|
if err := linkMachineFile(drive.Path, filepath.Join(paths.ChrootRootDir, stagedDrivePath)); err != nil {
|
|
return MachineSpec{}, fmt.Errorf("link drive %q into jail: %w", drive.ID, err)
|
|
}
|
|
stagedDrive.Path = stagedDrivePath
|
|
staged.Drives[i] = stagedDrive
|
|
}
|
|
|
|
if spec.Vsock != nil {
|
|
vsock := *spec.Vsock
|
|
vsock.Path = jailedVSockPath(spec)
|
|
staged.Vsock = &vsock
|
|
}
|
|
|
|
return staged, nil
|
|
}
|
|
|
|
func waitForSocket(ctx context.Context, client *apiClient, socketPath string) error {
|
|
waitContext, cancel := context.WithTimeout(ctx, defaultFirecrackerInitTimeout)
|
|
defer cancel()
|
|
|
|
ticker := time.NewTicker(defaultFirecrackerPollInterval)
|
|
defer ticker.Stop()
|
|
|
|
var lastStatErr error
|
|
var lastPingErr error
|
|
|
|
for {
|
|
select {
|
|
case <-waitContext.Done():
|
|
switch {
|
|
case lastPingErr != nil:
|
|
return fmt.Errorf("%w (socket=%q last_ping_err=%v)", waitContext.Err(), socketPath, lastPingErr)
|
|
case lastStatErr != nil:
|
|
return fmt.Errorf("%w (socket=%q last_stat_err=%v)", waitContext.Err(), socketPath, lastStatErr)
|
|
default:
|
|
return fmt.Errorf("%w (socket=%q)", waitContext.Err(), socketPath)
|
|
}
|
|
case <-ticker.C:
|
|
if _, err := os.Stat(socketPath); err != nil {
|
|
if os.IsNotExist(err) {
|
|
lastStatErr = err
|
|
continue
|
|
}
|
|
return fmt.Errorf("stat socket %q: %w", socketPath, err)
|
|
}
|
|
lastStatErr = nil
|
|
if err := client.Ping(waitContext); err != nil {
|
|
lastPingErr = err
|
|
continue
|
|
}
|
|
lastPingErr = nil
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
func additionalDriveRequests(spec MachineSpec) []driveRequest {
|
|
requests := make([]driveRequest, 0, len(spec.Drives))
|
|
for _, drive := range spec.Drives {
|
|
requests = append(requests, driveRequest{
|
|
DriveID: drive.ID,
|
|
IsReadOnly: drive.ReadOnly,
|
|
IsRootDevice: false,
|
|
PathOnHost: drive.Path,
|
|
})
|
|
}
|
|
return requests
|
|
}
|
|
|
|
func cleanupStartedProcess(command *exec.Cmd) {
|
|
if command == nil || command.Process == nil {
|
|
return
|
|
}
|
|
_ = command.Process.Kill()
|
|
}
|
|
|
|
func readPIDFile(pidFilePath string) (int, error) {
|
|
payload, err := os.ReadFile(pidFilePath)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
pid, err := strconv.Atoi(strings.TrimSpace(string(payload)))
|
|
if err != nil {
|
|
return 0, fmt.Errorf("parse pid file %q: %w", pidFilePath, err)
|
|
}
|
|
if pid < 1 {
|
|
return 0, fmt.Errorf("pid file %q must contain a positive pid", pidFilePath)
|
|
}
|
|
return pid, nil
|
|
}
|
|
|
|
func waitForPIDFile(ctx context.Context, pidFilePath string) (int, error) {
|
|
waitContext, cancel := context.WithTimeout(ctx, defaultFirecrackerInitTimeout)
|
|
defer cancel()
|
|
|
|
ticker := time.NewTicker(defaultFirecrackerPollInterval)
|
|
defer ticker.Stop()
|
|
|
|
var lastErr error
|
|
for {
|
|
select {
|
|
case <-waitContext.Done():
|
|
if lastErr != nil {
|
|
return 0, fmt.Errorf("%w (pid_file=%q last_err=%v)", waitContext.Err(), pidFilePath, lastErr)
|
|
}
|
|
return 0, fmt.Errorf("%w (pid_file=%q)", waitContext.Err(), pidFilePath)
|
|
case <-ticker.C:
|
|
pid, err := readPIDFile(pidFilePath)
|
|
if err == nil {
|
|
return pid, nil
|
|
}
|
|
lastErr = err
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return 0, err
|
|
}
|
|
}
|
|
}
|
|
|
|
func hostVSockPath(paths machinePaths, spec MachineSpec) string {
|
|
if spec.Vsock == nil {
|
|
return ""
|
|
}
|
|
return filepath.Join(paths.ChrootRootDir, defaultFirecrackerSocketDir, filepath.Base(strings.TrimSpace(spec.Vsock.Path)))
|
|
}
|
|
|
|
func jailedVSockPath(spec MachineSpec) string {
|
|
if spec.Vsock == nil {
|
|
return ""
|
|
}
|
|
return path.Join(defaultVSockRunDir, filepath.Base(strings.TrimSpace(spec.Vsock.Path)))
|
|
}
|
|
|
|
func linkMachineFile(source string, target string) error {
|
|
resolvedSource, err := filepath.EvalSymlinks(source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.Link(resolvedSource, target); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func rootDriveRequest(spec MachineSpec) driveRequest {
|
|
return driveRequest{
|
|
DriveID: defaultRootDriveID,
|
|
IsReadOnly: false,
|
|
IsRootDevice: true,
|
|
PathOnHost: spec.RootFSPath,
|
|
}
|
|
}
|
|
|
|
func stagedFileName(filePath string) (string, error) {
|
|
name := filepath.Base(strings.TrimSpace(filePath))
|
|
if name == "" || name == "." || name == string(filepath.Separator) {
|
|
return "", fmt.Errorf("file path is required")
|
|
}
|
|
return name, nil
|
|
}
|
|
|
|
func stageSnapshotFile(sourcePath string, chrootRootDir string, name string) (string, error) {
|
|
target := filepath.Join(chrootRootDir, name)
|
|
if err := linkMachineFile(sourcePath, target); err != nil {
|
|
return "", err
|
|
}
|
|
return name, nil
|
|
}
|