computer-host/internal/store/file_store.go
Hari b5c97aef07 host api alignment (#7)
* feat: add Firecracker API client methods for VM pause/resume and snapshots

Add PatchVm, GetVm, PutSnapshotCreate, and PutSnapshotLoad methods to the
API client, along with supporting types (VmState, SnapshotCreateParams,
SnapshotLoadParams, MemBackend).

* feat: add snapshot data layer - contract types, model, store, config

Add SnapshotID and snapshot contract types, SnapshotRecord model,
store interface CRUD methods with file store implementation,
snapshot paths helper, SnapshotsDir config, and directory creation.

* feat: add runtime methods for VM pause, resume, snapshot, and restore

Implement Pause, Resume, CreateSnapshot, and RestoreBoot on the
firecracker Runtime. RestoreBoot launches a jailer, stages snapshot
files into the chroot, loads the snapshot, and resumes the VM.

* feat: add daemon snapshot create, restore, and reconciliation logic

Implement CreateSnapshot (pause, snapshot, COW-copy disk, resume),
RestoreSnapshot (COW-copy disk, RestoreBoot, wait for guest),
GetSnapshot, ListSnapshots, DeleteSnapshotByID, and crash recovery
reconciliation for snapshot and restore operations.

* feat: add HTTP endpoints for snapshot create, get, list, delete, restore

Wire 5 snapshot routes: POST /machines/{id}/snapshots (create),
GET /machines/{id}/snapshots (list), GET /snapshots/{id} (get),
DELETE /snapshots/{id} (delete), POST /snapshots/{id}/restore (restore).

* fix: cross-device rename, restore network, and snapshot cleanup

- Replace os.Rename with copy+remove for moving snapshot files out of
  /proc/<pid>/root/ (cross-device link error on Linux)
- Reconfigure network interface after snapshot load so the restored VM
  uses its own tap device instead of the source VM's
- Clean partial snapshot dirs immediately on failure instead of only
  via reconcile
- Reject snapshot requests while a machine operation is already pending

* fix: test and modify snapshot runtime

* feat: snapshot lifecycle update, align runtime issues between host image
and daemon
2026-04-08 22:21:46 -04:00

485 lines
12 KiB
Go

package store
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/getcompanion-ai/computer-host/internal/model"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
type FileStore struct {
mu sync.Mutex
statePath string
operationsPath string
}
type persistedOperations struct {
Operations []model.OperationRecord `json:"operations"`
}
type persistedState struct {
Artifacts []model.ArtifactRecord `json:"artifacts"`
Machines []model.MachineRecord `json:"machines"`
Volumes []model.VolumeRecord `json:"volumes"`
Snapshots []model.SnapshotRecord `json:"snapshots"`
}
func NewFileStore(statePath string, operationsPath string) (*FileStore, error) {
store := &FileStore{
statePath: filepath.Clean(statePath),
operationsPath: filepath.Clean(operationsPath),
}
if err := initializeJSONFile(store.statePath, emptyPersistedState()); err != nil {
return nil, err
}
if err := initializeJSONFile(store.operationsPath, emptyPersistedOperations()); err != nil {
return nil, err
}
return store, nil
}
func (s *FileStore) PutArtifact(_ context.Context, record model.ArtifactRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Artifacts {
if state.Artifacts[i].Ref == record.Ref {
state.Artifacts[i] = record
return nil
}
}
state.Artifacts = append(state.Artifacts, record)
return nil
})
}
func (s *FileStore) GetArtifact(_ context.Context, ref contracthost.ArtifactRef) (*model.ArtifactRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
for i := range state.Artifacts {
if state.Artifacts[i].Ref == ref {
record := state.Artifacts[i]
return &record, nil
}
}
return nil, ErrNotFound
}
func (s *FileStore) ListArtifacts(_ context.Context) ([]model.ArtifactRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
return append([]model.ArtifactRecord(nil), state.Artifacts...), nil
}
func (s *FileStore) CreateMachine(_ context.Context, record model.MachineRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for _, machine := range state.Machines {
if machine.ID == record.ID {
return fmt.Errorf("store: machine %q already exists", record.ID)
}
}
state.Machines = append(state.Machines, record)
return nil
})
}
func (s *FileStore) GetMachine(_ context.Context, id contracthost.MachineID) (*model.MachineRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
for i := range state.Machines {
if state.Machines[i].ID == id {
record := state.Machines[i]
return &record, nil
}
}
return nil, ErrNotFound
}
func (s *FileStore) ListMachines(_ context.Context) ([]model.MachineRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
return append([]model.MachineRecord(nil), state.Machines...), nil
}
func (s *FileStore) UpdateMachine(_ context.Context, record model.MachineRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Machines {
if state.Machines[i].ID == record.ID {
state.Machines[i] = record
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) DeleteMachine(_ context.Context, id contracthost.MachineID) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Machines {
if state.Machines[i].ID == id {
state.Machines = append(state.Machines[:i], state.Machines[i+1:]...)
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) CreateVolume(_ context.Context, record model.VolumeRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for _, volume := range state.Volumes {
if volume.ID == record.ID {
return fmt.Errorf("store: volume %q already exists", record.ID)
}
}
state.Volumes = append(state.Volumes, record)
return nil
})
}
func (s *FileStore) GetVolume(_ context.Context, id contracthost.VolumeID) (*model.VolumeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
for i := range state.Volumes {
if state.Volumes[i].ID == id {
record := state.Volumes[i]
return &record, nil
}
}
return nil, ErrNotFound
}
func (s *FileStore) ListVolumes(_ context.Context) ([]model.VolumeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
return append([]model.VolumeRecord(nil), state.Volumes...), nil
}
func (s *FileStore) UpdateVolume(_ context.Context, record model.VolumeRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Volumes {
if state.Volumes[i].ID == record.ID {
state.Volumes[i] = record
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) DeleteVolume(_ context.Context, id contracthost.VolumeID) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Volumes {
if state.Volumes[i].ID == id {
state.Volumes = append(state.Volumes[:i], state.Volumes[i+1:]...)
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) UpsertOperation(_ context.Context, record model.OperationRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateOperations(func(operations *persistedOperations) error {
for i := range operations.Operations {
if operations.Operations[i].MachineID == record.MachineID {
operations.Operations[i] = record
return nil
}
}
operations.Operations = append(operations.Operations, record)
return nil
})
}
func (s *FileStore) ListOperations(_ context.Context) ([]model.OperationRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
operations, err := s.readOperations()
if err != nil {
return nil, err
}
return append([]model.OperationRecord(nil), operations.Operations...), nil
}
func (s *FileStore) DeleteOperation(_ context.Context, machineID contracthost.MachineID) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateOperations(func(operations *persistedOperations) error {
for i := range operations.Operations {
if operations.Operations[i].MachineID == machineID {
operations.Operations = append(operations.Operations[:i], operations.Operations[i+1:]...)
return nil
}
}
return nil
})
}
func (s *FileStore) CreateSnapshot(_ context.Context, record model.SnapshotRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for _, snap := range state.Snapshots {
if snap.ID == record.ID {
return fmt.Errorf("store: snapshot %q already exists", record.ID)
}
}
state.Snapshots = append(state.Snapshots, record)
return nil
})
}
func (s *FileStore) GetSnapshot(_ context.Context, id contracthost.SnapshotID) (*model.SnapshotRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
for i := range state.Snapshots {
if state.Snapshots[i].ID == id {
record := state.Snapshots[i]
return &record, nil
}
}
return nil, ErrNotFound
}
func (s *FileStore) ListSnapshotsByMachine(_ context.Context, machineID contracthost.MachineID) ([]model.SnapshotRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
state, err := s.readState()
if err != nil {
return nil, err
}
var result []model.SnapshotRecord
for _, snap := range state.Snapshots {
if snap.MachineID == machineID {
result = append(result, snap)
}
}
if result == nil {
result = []model.SnapshotRecord{}
}
return result, nil
}
func (s *FileStore) DeleteSnapshot(_ context.Context, id contracthost.SnapshotID) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.updateState(func(state *persistedState) error {
for i := range state.Snapshots {
if state.Snapshots[i].ID == id {
state.Snapshots = append(state.Snapshots[:i], state.Snapshots[i+1:]...)
return nil
}
}
return ErrNotFound
})
}
func (s *FileStore) readOperations() (*persistedOperations, error) {
var operations persistedOperations
if err := readJSONFile(s.operationsPath, &operations); err != nil {
return nil, err
}
normalizeOperations(&operations)
return &operations, nil
}
func (s *FileStore) readState() (*persistedState, error) {
var state persistedState
if err := readJSONFile(s.statePath, &state); err != nil {
return nil, err
}
normalizeState(&state)
return &state, nil
}
func (s *FileStore) updateOperations(update func(*persistedOperations) error) error {
operations, err := s.readOperations()
if err != nil {
return err
}
if err := update(operations); err != nil {
return err
}
return writeJSONFileAtomically(s.operationsPath, operations)
}
func (s *FileStore) updateState(update func(*persistedState) error) error {
state, err := s.readState()
if err != nil {
return err
}
if err := update(state); err != nil {
return err
}
return writeJSONFileAtomically(s.statePath, state)
}
func initializeJSONFile(path string, value any) error {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return fmt.Errorf("create store dir for %q: %w", path, err)
}
if _, err := os.Stat(path); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat store file %q: %w", path, err)
}
return writeJSONFileAtomically(path, value)
}
func readJSONFile(path string, value any) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read store file %q: %w", path, err)
}
if err := json.Unmarshal(data, value); err != nil {
return fmt.Errorf("decode store file %q: %w", path, err)
}
return nil
}
func writeJSONFileAtomically(path string, value any) error {
payload, err := json.MarshalIndent(value, "", " ")
if err != nil {
return fmt.Errorf("marshal store file %q: %w", path, err)
}
payload = append(payload, '\n')
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return fmt.Errorf("create store dir for %q: %w", path, err)
}
tmpPath := path + ".tmp"
file, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return fmt.Errorf("open temp store file %q: %w", tmpPath, err)
}
if _, err := file.Write(payload); err != nil {
file.Close()
return fmt.Errorf("write temp store file %q: %w", tmpPath, err)
}
if err := file.Sync(); err != nil {
file.Close()
return fmt.Errorf("sync temp store file %q: %w", tmpPath, err)
}
if err := file.Close(); err != nil {
return fmt.Errorf("close temp store file %q: %w", tmpPath, err)
}
if err := os.Rename(tmpPath, path); err != nil {
return fmt.Errorf("rename temp store file %q to %q: %w", tmpPath, path, err)
}
dir, err := os.Open(filepath.Dir(path))
if err != nil {
return fmt.Errorf("open store dir for %q: %w", path, err)
}
if err := dir.Sync(); err != nil {
dir.Close()
return fmt.Errorf("sync store dir for %q: %w", path, err)
}
if err := dir.Close(); err != nil {
return fmt.Errorf("close store dir for %q: %w", path, err)
}
return nil
}
func emptyPersistedState() persistedState {
return persistedState{
Artifacts: []model.ArtifactRecord{},
Machines: []model.MachineRecord{},
Volumes: []model.VolumeRecord{},
Snapshots: []model.SnapshotRecord{},
}
}
func emptyPersistedOperations() persistedOperations {
return persistedOperations{Operations: []model.OperationRecord{}}
}
func normalizeState(state *persistedState) {
if state.Artifacts == nil {
state.Artifacts = []model.ArtifactRecord{}
}
if state.Machines == nil {
state.Machines = []model.MachineRecord{}
}
if state.Volumes == nil {
state.Volumes = []model.VolumeRecord{}
}
if state.Snapshots == nil {
state.Snapshots = []model.SnapshotRecord{}
}
}
func normalizeOperations(operations *persistedOperations) {
if operations.Operations == nil {
operations.Operations = []model.OperationRecord{}
}
}