feat: firecracker best path

This commit is contained in:
Harivansh Rathi 2026-04-10 03:17:48 +00:00
parent 74e54d4c36
commit 54a4c423a6
7 changed files with 176 additions and 28 deletions

View file

@ -20,6 +20,7 @@ type Config struct {
MachineDisksDir string
SnapshotsDir string
RuntimeDir string
DiskCloneMode DiskCloneMode
SocketPath string
HTTPAddr string
EgressInterface string
@ -28,6 +29,16 @@ type Config struct {
GuestLoginCAPublicKey string
}
// DiskCloneMode controls how the daemon materializes writable machine disks.
type DiskCloneMode string
const (
// DiskCloneModeReflink requires an O(1) copy-on-write clone and never falls back to a full copy.
DiskCloneModeReflink DiskCloneMode = "reflink"
// DiskCloneModeCopy performs a full sparse copy. Use only for local development or emergency fallback.
DiskCloneModeCopy DiskCloneMode = "copy"
)
// Load loads and validates the firecracker-host daemon configuration from the environment.
func Load() (Config, error) {
rootDir := filepath.Clean(strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_ROOT_DIR")))
@ -39,6 +50,7 @@ func Load() (Config, error) {
MachineDisksDir: filepath.Join(rootDir, "machine-disks"),
SnapshotsDir: filepath.Join(rootDir, "snapshots"),
RuntimeDir: filepath.Join(rootDir, "runtime"),
DiskCloneMode: loadDiskCloneMode(os.Getenv("FIRECRACKER_HOST_DISK_CLONE_MODE")),
SocketPath: filepath.Join(rootDir, defaultSocketName),
HTTPAddr: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_HTTP_ADDR")),
EgressInterface: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_EGRESS_INTERFACE")),
@ -81,6 +93,9 @@ func (c Config) Validate() error {
if strings.TrimSpace(c.RuntimeDir) == "" {
return fmt.Errorf("runtime dir is required")
}
if err := c.DiskCloneMode.Validate(); err != nil {
return err
}
if strings.TrimSpace(c.SocketPath) == "" {
return fmt.Errorf("socket path is required")
}
@ -99,3 +114,21 @@ func (c Config) FirecrackerRuntimeConfig() firecracker.RuntimeConfig {
JailerBinaryPath: c.JailerBinaryPath,
}
}
func loadDiskCloneMode(raw string) DiskCloneMode {
value := strings.TrimSpace(raw)
if value == "" {
return DiskCloneModeReflink
}
return DiskCloneMode(value)
}
// Validate reports whether the clone mode is safe to use.
func (m DiskCloneMode) Validate() error {
switch m {
case DiskCloneModeReflink, DiskCloneModeCopy:
return nil
default:
return fmt.Errorf("FIRECRACKER_HOST_DISK_CLONE_MODE must be %q or %q", DiskCloneModeReflink, DiskCloneModeCopy)
}
}

View file

@ -0,0 +1,40 @@
package config
import "testing"
func TestLoadDiskCloneModeDefaultsToReflink(t *testing.T) {
t.Parallel()
if got := loadDiskCloneMode(""); got != DiskCloneModeReflink {
t.Fatalf("disk clone mode = %q, want %q", got, DiskCloneModeReflink)
}
}
func TestDiskCloneModeValidate(t *testing.T) {
t.Parallel()
tests := []struct {
name string
mode DiskCloneMode
wantErr bool
}{
{name: "reflink", mode: DiskCloneModeReflink},
{name: "copy", mode: DiskCloneModeCopy},
{name: "empty", mode: "", wantErr: true},
{name: "unknown", mode: "auto", wantErr: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
err := tt.mode.Validate()
if tt.wantErr && err == nil {
t.Fatal("Validate() error = nil, want error")
}
if !tt.wantErr && err != nil {
t.Fatalf("Validate() error = %v, want nil", err)
}
})
}
}

View file

@ -7,10 +7,10 @@ import (
"path/filepath"
"time"
contracthost "github.com/getcompanion-ai/computer-host/contract"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
"github.com/getcompanion-ai/computer-host/internal/model"
"github.com/getcompanion-ai/computer-host/internal/store"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachineRequest) (*contracthost.CreateMachineResponse, error) {
@ -66,8 +66,8 @@ func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachi
if err := os.MkdirAll(filepath.Dir(systemVolumePath), 0o755); err != nil {
return nil, fmt.Errorf("create system volume dir for %q: %w", req.MachineID, err)
}
if err := cowCopyFile(artifact.RootFSPath, systemVolumePath); err != nil {
return nil, err
if err := cloneDiskFile(artifact.RootFSPath, systemVolumePath, d.config.DiskCloneMode); err != nil {
return nil, fmt.Errorf("clone rootfs for %q: %w", req.MachineID, err)
}
removeSystemVolumeOnFailure := true
defer func() {

View file

@ -13,11 +13,11 @@ import (
"testing"
"time"
contracthost "github.com/getcompanion-ai/computer-host/contract"
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
"github.com/getcompanion-ai/computer-host/internal/model"
"github.com/getcompanion-ai/computer-host/internal/store"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
type fakeRuntime struct {
@ -757,6 +757,7 @@ func testConfig(root string) appconfig.Config {
MachineDisksDir: filepath.Join(root, "machine-disks"),
SnapshotsDir: filepath.Join(root, "snapshots"),
RuntimeDir: filepath.Join(root, "runtime"),
DiskCloneMode: appconfig.DiskCloneModeCopy,
SocketPath: filepath.Join(root, "firecracker-host.sock"),
EgressInterface: "eth0",
FirecrackerBinaryPath: "/usr/bin/firecracker",

View file

@ -13,7 +13,9 @@ import (
"os/exec"
"path/filepath"
"strings"
"syscall"
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
"github.com/getcompanion-ai/computer-host/internal/model"
contracthost "github.com/getcompanion-ai/computer-host/contract"
@ -92,6 +94,70 @@ func cloneFile(source string, target string) error {
return nil
}
func cloneDiskFile(source string, target string, mode appconfig.DiskCloneMode) error {
switch mode {
case appconfig.DiskCloneModeReflink:
return reflinkFile(source, target)
case appconfig.DiskCloneModeCopy:
return cloneFile(source, target)
default:
return fmt.Errorf("unsupported disk clone mode %q", mode)
}
}
func reflinkFile(source string, target string) error {
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("create target dir for %q: %w", target, err)
}
sourceFile, err := os.Open(source)
if err != nil {
return fmt.Errorf("open source file %q: %w", source, err)
}
defer func() {
_ = sourceFile.Close()
}()
sourceInfo, err := sourceFile.Stat()
if err != nil {
return fmt.Errorf("stat source file %q: %w", source, err)
}
tmpPath := target + ".tmp"
targetFile, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, sourceInfo.Mode().Perm())
if err != nil {
return fmt.Errorf("open target file %q: %w", tmpPath, err)
}
if err := ioctlFileClone(targetFile, sourceFile); err != nil {
_ = targetFile.Close()
_ = os.Remove(tmpPath)
return fmt.Errorf("reflink clone %q to %q: %w", source, tmpPath, err)
}
if err := targetFile.Close(); err != nil {
_ = os.Remove(tmpPath)
return fmt.Errorf("close target file %q: %w", tmpPath, err)
}
if err := os.Rename(tmpPath, target); err != nil {
_ = os.Remove(tmpPath)
return fmt.Errorf("rename target file %q to %q: %w", tmpPath, target, err)
}
if err := syncDir(filepath.Dir(target)); err != nil {
return err
}
return nil
}
func ioctlFileClone(targetFile *os.File, sourceFile *os.File) error {
const ficlone = 0x40049409
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, targetFile.Fd(), ficlone, sourceFile.Fd())
if errno != 0 {
return errno
}
return nil
}
func downloadFile(ctx context.Context, rawURL string, path string) error {
if _, err := os.Stat(path); err == nil {
return nil

View file

@ -7,9 +7,11 @@ import (
"path/filepath"
"syscall"
"testing"
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
)
func TestCloneFilePreservesSparseDiskUsage(t *testing.T) {
func TestCloneDiskFileCopyPreservesSparseDiskUsage(t *testing.T) {
root := t.TempDir()
sourcePath := filepath.Join(root, "source.img")
targetPath := filepath.Join(root, "target.img")
@ -46,7 +48,7 @@ func TestCloneFilePreservesSparseDiskUsage(t *testing.T) {
t.Skip("temp filesystem does not expose sparse allocation savings")
}
if err := cloneFile(sourcePath, targetPath); err != nil {
if err := cloneDiskFile(sourcePath, targetPath, appconfig.DiskCloneModeCopy); err != nil {
t.Fatalf("clone sparse file: %v", err)
}
@ -81,6 +83,29 @@ func TestCloneFilePreservesSparseDiskUsage(t *testing.T) {
}
}
func TestCloneDiskFileReflinkMode(t *testing.T) {
root := t.TempDir()
sourcePath := filepath.Join(root, "source.img")
targetPath := filepath.Join(root, "target.img")
if err := os.WriteFile(sourcePath, []byte("rootfs"), 0o644); err != nil {
t.Fatalf("write source file: %v", err)
}
err := cloneDiskFile(sourcePath, targetPath, appconfig.DiskCloneModeReflink)
if err != nil {
t.Skipf("temp filesystem does not support reflinks: %v", err)
}
targetData, err := os.ReadFile(targetPath)
if err != nil {
t.Fatalf("read target file: %v", err)
}
if !bytes.Equal(targetData, []byte("rootfs")) {
t.Fatalf("target data mismatch: %q", string(targetData))
}
}
func allocatedBytes(path string) (int64, error) {
info, err := os.Stat(path)
if err != nil {

View file

@ -5,17 +5,16 @@ import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
contracthost "github.com/getcompanion-ai/computer-host/contract"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
"github.com/getcompanion-ai/computer-host/internal/model"
"github.com/getcompanion-ai/computer-host/internal/store"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
func (d *Daemon) CreateSnapshot(ctx context.Context, machineID contracthost.MachineID, req contracthost.CreateSnapshotRequest) (*contracthost.CreateSnapshotResponse, error) {
@ -104,7 +103,7 @@ func (d *Daemon) CreateSnapshot(ctx context.Context, machineID contracthost.Mach
return nil, fmt.Errorf("get system volume: %w", err)
}
systemDiskTarget := filepath.Join(snapshotDir, "system.img")
if err := cowCopyFile(systemVolume.Path, systemDiskTarget); err != nil {
if err := cloneDiskFile(systemVolume.Path, systemDiskTarget, d.config.DiskCloneMode); err != nil {
_ = d.runtime.Resume(ctx, runtimeState)
_ = os.RemoveAll(snapshotDir)
return nil, fmt.Errorf("copy system disk: %w", err)
@ -119,7 +118,7 @@ func (d *Daemon) CreateSnapshot(ctx context.Context, machineID contracthost.Mach
}
driveID := fmt.Sprintf("user-%d", i)
targetPath := filepath.Join(snapshotDir, driveID+".img")
if err := cowCopyFile(volume.Path, targetPath); err != nil {
if err := cloneDiskFile(volume.Path, targetPath, d.config.DiskCloneMode); err != nil {
_ = d.runtime.Resume(ctx, runtimeState)
_ = os.RemoveAll(snapshotDir)
return nil, fmt.Errorf("copy attached volume %q: %w", volumeID, err)
@ -303,7 +302,7 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
clearOperation = true
return nil, fmt.Errorf("snapshot %q is missing vmstate artifact", snapshotID)
}
if err := cowCopyFile(systemDiskPath.LocalPath, newSystemDiskPath); err != nil {
if err := cloneDiskFile(systemDiskPath.LocalPath, newSystemDiskPath, d.config.DiskCloneMode); err != nil {
clearOperation = true
return nil, fmt.Errorf("copy system disk for restore: %w", err)
}
@ -320,7 +319,7 @@ func (d *Daemon) RestoreSnapshot(ctx context.Context, snapshotID contracthost.Sn
driveID := strings.TrimSuffix(name, filepath.Ext(name))
volumeID := contracthost.VolumeID(fmt.Sprintf("%s-%s", req.MachineID, driveID))
volumePath := filepath.Join(d.config.MachineDisksDir, string(req.MachineID), name)
if err := cowCopyFile(restored.LocalPath, volumePath); err != nil {
if err := cloneDiskFile(restored.LocalPath, volumePath, d.config.DiskCloneMode); err != nil {
clearOperation = true
return nil, fmt.Errorf("copy restored drive %q: %w", driveID, err)
}
@ -596,19 +595,3 @@ func moveFile(src, dst string) error {
}
return os.Remove(src)
}
func cowCopyFile(source string, target string) error {
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("create target dir for %q: %w", target, err)
}
cmd := exec.Command("cp", "--reflink=auto", "--sparse=always", source, target)
output, err := cmd.CombinedOutput()
if err != nil {
if cloneErr := cloneFile(source, target); cloneErr == nil {
return nil
} else {
return fmt.Errorf("cow copy %q to %q: cp failed: %w: %s; clone fallback failed: %w", source, target, err, string(output), cloneErr)
}
}
return nil
}