From efd71496f5d2dabe466335e6dad2deac6a202f8f Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Sun, 22 Mar 2026 01:27:33 +0530 Subject: [PATCH 1/6] [shimV2] adds vpci device controller This change adds the VPCI device controller which can be used to assign/unassign virtual PCI devices to the UVM. The same then can be assigned to the containers running in the UVM. Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/doc.go | 30 ++++ internal/controller/device/vpci/interface.go | 82 +++++++++ internal/controller/device/vpci/utils.go | 30 ++++ internal/controller/device/vpci/vpci.go | 179 +++++++++++++++++++ internal/controller/device/vpci/vpci_lcow.go | 17 ++ internal/controller/device/vpci/vpci_wcow.go | 11 ++ internal/devices/assigned_devices.go | 3 +- internal/uvm/virtual_device.go | 21 --- internal/vm/guestmanager/device_lcow.go | 12 -- internal/vm/vmmanager/pci.go | 11 -- 10 files changed, 351 insertions(+), 45 deletions(-) create mode 100644 internal/controller/device/vpci/doc.go create mode 100644 internal/controller/device/vpci/interface.go create mode 100644 internal/controller/device/vpci/vpci.go create mode 100644 internal/controller/device/vpci/vpci_lcow.go create mode 100644 internal/controller/device/vpci/vpci_wcow.go diff --git a/internal/controller/device/vpci/doc.go b/internal/controller/device/vpci/doc.go new file mode 100644 index 0000000000..e7aa197c50 --- /dev/null +++ b/internal/controller/device/vpci/doc.go @@ -0,0 +1,30 @@ +//go:build windows + +// Package vpci provides a controller for managing virtual PCI (vPCI) device +// assignments on a Utility VM (UVM). It handles assigning and removing +// PCI devices from the UVM via HCS modify calls. +// +// # Lifecycle +// +// [Manager] tracks active device assignments by VMBus GUID (device identifier +// within UVM) in an internal map. Each assignment is reference-counted to +// support shared access by multiple callers. +// +// - [Controller.AddToVM] assigns a device and records it in the map. +// If the same device is already assigned, the reference count is incremented. +// - [Controller.RemoveFromVM] decrements the reference count for the device +// identified by VMBus GUID. When it reaches zero, the device is removed +// from the VM. +// +// # Invalid Devices +// +// If the host-side assignment succeeds but the guest-side notification fails, +// the device is marked invalid. It remains tracked so that the caller can call +// [Controller.RemoveFromVM] to perform host-side cleanup. +// +// # Guest Requests +// +// On LCOW, assigning a vPCI device requires a guest-side notification so the +// GCS can wait for the required device paths to become available. +// WCOW does not require a guest request as part of device assignment. +package vpci diff --git a/internal/controller/device/vpci/interface.go b/internal/controller/device/vpci/interface.go new file mode 100644 index 0000000000..347143b486 --- /dev/null +++ b/internal/controller/device/vpci/interface.go @@ -0,0 +1,82 @@ +//go:build windows + +package vpci + +import ( + "context" + + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// Controller manages the lifecycle of vPCI devices assigned to a UVM. +type Controller interface { + // AddToVM assigns a vPCI device to the VM. If the same device is already + // assigned, the reference count is incremented. + AddToVM(ctx context.Context, opts *AddOptions) error + + // RemoveFromVM removes a vPCI device identified by vmBusGUID from the VM. + // If the device is shared (reference count > 1), the reference count is + // decremented without actually removing the device. + RemoveFromVM(ctx context.Context, vmBusGUID string) error +} + +// AddOptions holds the configuration required to assign a vPCI device to the VM. +type AddOptions struct { + // DeviceInstanceID is the host device instance path of the vPCI device. + DeviceInstanceID string + + // VirtualFunctionIndex is the SR-IOV virtual function index to assign. + VirtualFunctionIndex uint16 + + // VMBusGUID identifies the VirtualPci device (backed by a VMBus channel) + // inside the UVM. + VMBusGUID string +} + +// vmVPCI manages adding and removing vPCI devices for a Utility VM. +// Implemented by [vmmanager.UtilityVM]. +type vmVPCI interface { + // AddDevice adds a vPCI device identified by `vmBusGUID` to the Utility VM with the provided settings. + AddDevice(ctx context.Context, vmBusGUID string, settings hcsschema.VirtualPciDevice) error + + // RemoveDevice removes the vPCI device identified by `vmBusGUID` from the Utility VM. + RemoveDevice(ctx context.Context, vmBusGUID string) error +} + +// linuxGuestVPCI exposes vPCI device operations in the LCOW guest. +// Implemented by [guestmanager.Guest]. +type linuxGuestVPCI interface { + // AddVPCIDevice adds a vPCI device to the guest. + AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error +} + +// ============================================================================== +// INTERNAL DATA STRUCTURES +// ============================================================================== + +// deviceKey uniquely identifies a host vPCI device by its instance ID and +// virtual function index. +type deviceKey struct { + deviceInstanceID string + virtualFunctionIndex uint16 +} + +// deviceInfo records one vPCI device's assignment state and reference count. +type deviceInfo struct { + // key is the immutable host device identifier used to detect duplicate + // assignment requests. + key deviceKey + + // vmBusGUID identifies the VirtualPci device (backed by a VMBus channel) + // inside the UVM. + vmBusGUID string + + // refCount is the number of active callers sharing this device. + // Access must be guarded by [Manager.mu]. + refCount uint32 + + // invalid indicates the host-side assignment succeeded but the guest-side + // assignment failed. Access must be guarded by [Manager.mu]. + invalid bool +} diff --git a/internal/controller/device/vpci/utils.go b/internal/controller/device/vpci/utils.go index 044764057d..3d8395362a 100644 --- a/internal/controller/device/vpci/utils.go +++ b/internal/controller/device/vpci/utils.go @@ -3,6 +3,7 @@ package vpci import ( + "fmt" "path/filepath" "strconv" ) @@ -17,6 +18,16 @@ const ( DeviceIDType = "vpci-instance-id" ) +const ( + // vmBusChannelTypeGUIDFormatted is the well-known channel type GUID defined by + // VMBus for all assigned devices. + vmBusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" + + // assignedDeviceEnumerator is the VMBus enumerator prefix used in device + // instance IDs for assigned devices. + assignedDeviceEnumerator = "VMBUS" +) + // IsValidDeviceType returns true if the device type is valid i.e. supported by the runtime. func IsValidDeviceType(deviceType string) bool { return (deviceType == DeviceIDType) || @@ -36,3 +47,22 @@ func GetDeviceInfoFromPath(rawDevicePath string) (string, uint16) { // otherwise, just use default index and full device ID given return rawDevicePath, 0 } + +// GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBus channel +// device node created when a device is assigned to a UVM via vPCI. +// +// When a device is assigned to a UVM via VPCI support in HCS, a new VMBUS channel device node is +// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBUS +// channel device node. +// +// A device node's instance ID is an identifier that distinguishes that device from other devices +// on the system. The GUID of a VMBUS channel device node refers to that channel's unique +// identifier used internally by VMBUS and can be used to determine the VMBUS channel +// device node's instance ID. +// +// A VMBus channel device node's instance ID is in the form: +// +// "VMBUS\{channelTypeGUID}\{vmBusChannelGUID}" +func GetAssignedDeviceVMBUSInstanceID(vmBusChannelGUID string) string { + return fmt.Sprintf("%s\\%s\\{%s}", assignedDeviceEnumerator, vmBusChannelTypeGUIDFormatted, vmBusChannelGUID) +} diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go new file mode 100644 index 0000000000..1cda0446fb --- /dev/null +++ b/internal/controller/device/vpci/vpci.go @@ -0,0 +1,179 @@ +//go:build windows + +package vpci + +import ( + "context" + "fmt" + "sync" + + "github.com/sirupsen/logrus" + + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" +) + +// Manager is the concrete implementation of [Controller]. +type Manager struct { + mu sync.Mutex + + // devices tracks currently assigned vPCI devices, keyed by VMBus GUID. + // Guarded by mu. + devices map[string]*deviceInfo + + // keyToGUID maps a [deviceKey] to its VMBus GUID for duplicate detection + // during [Manager.AddToVM]. Guarded by mu. + keyToGUID map[deviceKey]string + + // vmVPCI performs host-side vPCI device add/remove on the VM. + vmVPCI vmVPCI + + // linuxGuestVPCI performs guest-side vPCI device setup for LCOW. + linuxGuestVPCI linuxGuestVPCI +} + +var _ Controller = (*Manager)(nil) + +// New creates a ready-to-use [Manager]. +func New( + vmVPCI vmVPCI, + linuxGuestVPCI linuxGuestVPCI, +) *Manager { + return &Manager{ + vmVPCI: vmVPCI, + linuxGuestVPCI: linuxGuestVPCI, + devices: make(map[string]*deviceInfo), + keyToGUID: make(map[deviceKey]string), + } +} + +// AddToVM assigns a vPCI device to the VM. +// If the same device is already assigned, the existing assignment is reused. +func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { + if opts.VMBusGUID == "" { + return fmt.Errorf("vmbus guid is required in add options") + } + + key := deviceKey{ + deviceInstanceID: opts.DeviceInstanceID, + virtualFunctionIndex: opts.VirtualFunctionIndex, + } + + // Set vmBusGUID in logging context. + ctx, _ = log.WithContext(ctx, logrus.WithField("vmBusGUID", opts.VMBusGUID)) + + m.mu.Lock() + defer m.mu.Unlock() + + // Check for an existing assignment with the same device key. + if existingGUID, ok := m.keyToGUID[key]; ok { + dev := m.devices[existingGUID] + + // If a previous assignment left the device in an invalid state + // reject new callers until the existing assignment is cleaned up. + if dev.invalid { + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", existingGUID) + } + + // Increase the refCount and return the existing device. + dev.refCount++ + + log.G(ctx).WithFields(logrus.Fields{ + "deviceInstanceID": key.deviceInstanceID, + "virtualFunctionIndex": key.virtualFunctionIndex, + "refCount": dev.refCount, + }).Debug("vPCI device already assigned, reusing existing assignment") + + return nil + } + + // Device not attached to VM. + // Build the VirtualPciDevice settings for HCS call. + + log.G(ctx).WithFields(logrus.Fields{ + "deviceInstanceID": key.deviceInstanceID, + "virtualFunctionIndex": key.virtualFunctionIndex, + }).Debug("assigning vPCI device to VM") + + // NUMA affinity is always propagated for assigned devices. + // This feature is available on WS2025 and later. + // Since the V2 shims only support WS2025 and later, this is set as true. + propagateAffinity := true + + settings := hcsschema.VirtualPciDevice{ + Functions: []hcsschema.VirtualPciFunction{ + { + DeviceInstancePath: opts.DeviceInstanceID, + VirtualFunction: opts.VirtualFunctionIndex, + }, + }, + PropagateNumaAffinity: &propagateAffinity, + } + + // Host-side: add the vPCI device to the VM. + if err := m.vmVPCI.AddDevice(ctx, opts.VMBusGUID, settings); err != nil { + return fmt.Errorf("add vpci device %s to vm: %w", opts.DeviceInstanceID, err) + } + + // Track early so RemoveFromVM can clean up even if the guest-side call fails. + dev := &deviceInfo{ + key: key, + vmBusGUID: opts.VMBusGUID, + refCount: 1, + } + m.devices[opts.VMBusGUID] = dev + m.keyToGUID[key] = opts.VMBusGUID + + // Guest-side: device attach notification. + if err := m.addGuestVPCIDevice(ctx, opts.VMBusGUID); err != nil { + // Mark the device as invalid so the caller can call RemoveFromVM + // to clean up the host-side assignment. + dev.invalid = true + log.G(ctx).WithError(err).Error("guest-side vpci device setup failed, device marked invalid") + return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", opts.VMBusGUID, err) + } + + log.G(ctx).Info("vPCI device assigned to VM") + + return nil +} + +// RemoveFromVM removes a vPCI device from the VM. +// If the device is shared (reference count > 1), the reference count is +// decremented without actually removing the device from the VM. +func (m *Manager) RemoveFromVM(ctx context.Context, vmBusGUID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + ctx, _ = log.WithContext(ctx, logrus.WithField("vmBusGUID", vmBusGUID)) + + dev, ok := m.devices[vmBusGUID] + if !ok { + return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) + } + + dev.refCount-- + if dev.refCount > 0 { + log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") + return nil + } + + // This path is reached when the device is no longer shared (refCount == 0) or + // had transitioned into an invalid state during AddToVM call. + + log.G(ctx).Debug("removing vPCI device from VM") + + // Host-side: remove the vPCI device from the VM. + if err := m.vmVPCI.RemoveDevice(ctx, vmBusGUID); err != nil { + // Restore the ref count since the removal failed. + dev.refCount++ + return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) + } + + delete(m.devices, vmBusGUID) + delete(m.keyToGUID, dev.key) + + log.G(ctx).Info("vPCI device removed from VM") + + return nil +} diff --git a/internal/controller/device/vpci/vpci_lcow.go b/internal/controller/device/vpci/vpci_lcow.go new file mode 100644 index 0000000000..e54a0f69cd --- /dev/null +++ b/internal/controller/device/vpci/vpci_lcow.go @@ -0,0 +1,17 @@ +//go:build windows && !wcow + +package vpci + +import ( + "context" + + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// addGuestVPCIDevice notifies the guest about the new device and blocks until +// the required sysfs/device paths are available before workloads use them. +func (m *Manager) addGuestVPCIDevice(ctx context.Context, vmBusGUID string) error { + return m.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ + VMBusGUID: vmBusGUID, + }) +} diff --git a/internal/controller/device/vpci/vpci_wcow.go b/internal/controller/device/vpci/vpci_wcow.go new file mode 100644 index 0000000000..7efcd33de6 --- /dev/null +++ b/internal/controller/device/vpci/vpci_wcow.go @@ -0,0 +1,11 @@ +//go:build windows && wcow + +package vpci + +import "context" + +// addGuestVPCIDevice is a no-op for Windows guests. WCOW does not require a +// guest-side notification as part of vPCI device assignment. +func (m *Manager) addGuestVPCIDevice(_ context.Context, _ string) error { + return nil +} diff --git a/internal/devices/assigned_devices.go b/internal/devices/assigned_devices.go index 97db77e0d6..3b751ad57a 100644 --- a/internal/devices/assigned_devices.go +++ b/internal/devices/assigned_devices.go @@ -10,6 +10,7 @@ import ( "strconv" "github.com/Microsoft/hcsshim/internal/cmd" + vpciCtrl "github.com/Microsoft/hcsshim/internal/controller/device/vpci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/pkg/errors" @@ -45,7 +46,7 @@ func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID string, if err != nil { return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) } - vmBusInstanceID := vm.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) + vmBusInstanceID := vpciCtrl.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) log.G(ctx).WithField("vmbus id", vmBusInstanceID).Info("vmbus instance ID") locationPaths, err = getChildrenDeviceLocationPaths(ctx, vm, vmBusInstanceID, deviceUtilPath) diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index 7a0518f926..1fc4078397 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -24,10 +24,6 @@ const ( VPCIDeviceIDType = "vpci-instance-id" ) -// this is the well known channel type GUID defined by VMBUS for all assigned devices -const vmbusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" -const assignedDeviceEnumerator = "VMBUS" - type VPCIDeviceID struct { deviceInstanceID string virtualFunctionIndex uint16 @@ -55,23 +51,6 @@ type VPCIDevice struct { refCount uint32 } -// GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBUS channel device node created. -// -// When a device is assigned to a UVM via VPCI support in HCS, a new VMBUS channel device node is -// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBUS -// channel device node. -// -// A device node's instance ID is an identifier that distinguishes that device from other devices -// on the system. The GUID of a VMBUS channel device node refers to that channel's unique -// identifier used internally by VMBUS and can be used to determine the VMBUS channel -// device node's instance ID. -// -// A VMBUS channel device node's instance ID is in the form: -// "VMBUS\vmbusChannelTypeGUIDFormatted\{vmBusChannelGUID}" -func (uvm *UtilityVM) GetAssignedDeviceVMBUSInstanceID(vmBusChannelGUID string) string { - return fmt.Sprintf("%s\\%s\\{%s}", assignedDeviceEnumerator, vmbusChannelTypeGUIDFormatted, vmBusChannelGUID) -} - // Release frees the resources of the corresponding vpci device func (vpci *VPCIDevice) Release(ctx context.Context) error { if err := vpci.vm.RemoveDevice(ctx, vpci.deviceInstanceID, vpci.virtualFunctionIndex); err != nil { diff --git a/internal/vm/guestmanager/device_lcow.go b/internal/vm/guestmanager/device_lcow.go index c6cece1fa0..61524a0ccc 100644 --- a/internal/vm/guestmanager/device_lcow.go +++ b/internal/vm/guestmanager/device_lcow.go @@ -11,18 +11,6 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -// LCOWDeviceManager exposes VPCI and VPMem device operations in the LCOW guest. -type LCOWDeviceManager interface { - // AddVPCIDevice adds a VPCI device to the guest. - AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error - // AddVPMemDevice adds a VPMem device to the guest. - AddVPMemDevice(ctx context.Context, settings guestresource.LCOWMappedVPMemDevice) error - // RemoveVPMemDevice removes a VPMem device from the guest. - RemoveVPMemDevice(ctx context.Context, settings guestresource.LCOWMappedVPMemDevice) error -} - -var _ LCOWDeviceManager = (*Guest)(nil) - // AddVPCIDevice adds a VPCI device in the guest. func (gm *Guest) AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error { request := &hcsschema.ModifySettingRequest{ diff --git a/internal/vm/vmmanager/pci.go b/internal/vm/vmmanager/pci.go index fbb72c4c04..fe5fc291e2 100644 --- a/internal/vm/vmmanager/pci.go +++ b/internal/vm/vmmanager/pci.go @@ -11,17 +11,6 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" ) -// PCIManager manages assiging pci devices to a Utility VM. This is Windows specific at the moment. -type PCIManager interface { - // AddDevice adds a pci device identified by `vmbusGUID` to the Utility VM with the provided settings. - AddDevice(ctx context.Context, vmbusGUID string, settings hcsschema.VirtualPciDevice) error - - // RemoveDevice removes the pci device identified by `vmbusGUID` from the Utility VM. - RemoveDevice(ctx context.Context, vmbusGUID string) error -} - -var _ PCIManager = (*UtilityVM)(nil) - func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID string, settings hcsschema.VirtualPciDevice) error { request := &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vmbusGUID), From 84723b3bc672572f32ae32eeed33c4128a814ac5 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Wed, 25 Mar 2026 22:11:14 +0530 Subject: [PATCH 2/6] review comments: 1 Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/vpci.go | 40 +++++++++++++++----- internal/controller/device/vpci/vpci_lcow.go | 4 +- internal/controller/device/vpci/vpci_wcow.go | 4 +- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go index 1cda0446fb..ffcafe0735 100644 --- a/internal/controller/device/vpci/vpci.go +++ b/internal/controller/device/vpci/vpci.go @@ -65,28 +65,48 @@ func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { m.mu.Lock() defer m.mu.Unlock() - // Check for an existing assignment with the same device key. - if existingGUID, ok := m.keyToGUID[key]; ok { - dev := m.devices[existingGUID] + // Check if the caller-provided GUID is already tracked. + if existingDev, ok := m.devices[opts.VMBusGUID]; ok { + // The GUID exists — verify the device settings match what was originally assigned. + // A mismatch means the caller is trying to reuse a GUID for a different device, + // which is a configuration error. + if existingDev.key != key { + return fmt.Errorf( + "vmBusGUID %s is already assigned to device (instanceID=%s, vfIndex=%d), but caller provided different settings (instanceID=%s, vfIndex=%d)", + opts.VMBusGUID, + existingDev.key.deviceInstanceID, existingDev.key.virtualFunctionIndex, + key.deviceInstanceID, key.virtualFunctionIndex, + ) + } - // If a previous assignment left the device in an invalid state + // If a previous assignment left the device in an invalid state, // reject new callers until the existing assignment is cleaned up. - if dev.invalid { - return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", existingGUID) + if existingDev.invalid { + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", opts.VMBusGUID) } - // Increase the refCount and return the existing device. - dev.refCount++ + // Same GUID, same device — reuse the existing assignment. + existingDev.refCount++ log.G(ctx).WithFields(logrus.Fields{ "deviceInstanceID": key.deviceInstanceID, "virtualFunctionIndex": key.virtualFunctionIndex, - "refCount": dev.refCount, + "refCount": existingDev.refCount, }).Debug("vPCI device already assigned, reusing existing assignment") return nil } + // The GUID is new — check whether the same device key is already assigned + // under a different GUID. This means the caller provided an inconsistent GUID. + if existingGUID, ok := m.keyToGUID[key]; ok { + return fmt.Errorf( + "vpci device (instanceID=%s, vfIndex=%d) is already assigned with vmBusGUID %s, but caller provided %s", + key.deviceInstanceID, key.virtualFunctionIndex, + existingGUID, opts.VMBusGUID, + ) + } + // Device not attached to VM. // Build the VirtualPciDevice settings for HCS call. @@ -125,7 +145,7 @@ func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { m.keyToGUID[key] = opts.VMBusGUID // Guest-side: device attach notification. - if err := m.addGuestVPCIDevice(ctx, opts.VMBusGUID); err != nil { + if err := m.waitGuestDeviceReady(ctx, opts.VMBusGUID); err != nil { // Mark the device as invalid so the caller can call RemoveFromVM // to clean up the host-side assignment. dev.invalid = true diff --git a/internal/controller/device/vpci/vpci_lcow.go b/internal/controller/device/vpci/vpci_lcow.go index e54a0f69cd..f2391e91ee 100644 --- a/internal/controller/device/vpci/vpci_lcow.go +++ b/internal/controller/device/vpci/vpci_lcow.go @@ -8,9 +8,9 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -// addGuestVPCIDevice notifies the guest about the new device and blocks until +// waitGuestDeviceReady notifies the guest about the new device and blocks until // the required sysfs/device paths are available before workloads use them. -func (m *Manager) addGuestVPCIDevice(ctx context.Context, vmBusGUID string) error { +func (m *Manager) waitGuestDeviceReady(ctx context.Context, vmBusGUID string) error { return m.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ VMBusGUID: vmBusGUID, }) diff --git a/internal/controller/device/vpci/vpci_wcow.go b/internal/controller/device/vpci/vpci_wcow.go index 7efcd33de6..96a72516ed 100644 --- a/internal/controller/device/vpci/vpci_wcow.go +++ b/internal/controller/device/vpci/vpci_wcow.go @@ -4,8 +4,8 @@ package vpci import "context" -// addGuestVPCIDevice is a no-op for Windows guests. WCOW does not require a +// waitGuestDeviceReady is a no-op for Windows guests. WCOW does not require a // guest-side notification as part of vPCI device assignment. -func (m *Manager) addGuestVPCIDevice(_ context.Context, _ string) error { +func (m *Manager) waitGuestDeviceReady(_ context.Context, _ string) error { return nil } From a2dc4e0248f2bb20a4c482f85efc5c53716da569 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Wed, 1 Apr 2026 04:19:25 +0530 Subject: [PATCH 3/6] review comments: 2 Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/doc.go | 20 +- .../device/vpci/{interface.go => types.go} | 41 +--- internal/controller/device/vpci/utils.go | 12 +- internal/controller/device/vpci/vpci.go | 206 ++++++++++-------- internal/controller/device/vpci/vpci_lcow.go | 6 +- .../device/vpci/vpci_unsupported.go | 13 ++ internal/controller/device/vpci/vpci_wcow.go | 2 +- internal/logfields/fields.go | 6 + 8 files changed, 168 insertions(+), 138 deletions(-) rename internal/controller/device/vpci/{interface.go => types.go} (57%) create mode 100644 internal/controller/device/vpci/vpci_unsupported.go diff --git a/internal/controller/device/vpci/doc.go b/internal/controller/device/vpci/doc.go index e7aa197c50..3c0dd04209 100644 --- a/internal/controller/device/vpci/doc.go +++ b/internal/controller/device/vpci/doc.go @@ -6,15 +6,21 @@ // // # Lifecycle // -// [Manager] tracks active device assignments by VMBus GUID (device identifier +// [Controller] tracks active device assignments by VMBus GUID (device identifier // within UVM) in an internal map. Each assignment is reference-counted to // support shared access by multiple callers. // -// - [Controller.AddToVM] assigns a device and records it in the map. -// If the same device is already assigned, the reference count is incremented. +// - [Controller.Reserve] generates a unique VMBus GUID for a device and +// records the reservation. If the same device is already reserved, the +// existing GUID is returned. +// - [Controller.AddToVM] assigns a previously reserved device to the VM +// using the VMBus GUID returned by Reserve. If the device is already +// assigned, the reference count is incremented and the call succeeds +// without a second host-side assignment. // - [Controller.RemoveFromVM] decrements the reference count for the device // identified by VMBus GUID. When it reaches zero, the device is removed -// from the VM. +// from the VM. It also handles cleanup for devices that were reserved +// but never assigned. // // # Invalid Devices // @@ -22,6 +28,12 @@ // the device is marked invalid. It remains tracked so that the caller can call // [Controller.RemoveFromVM] to perform host-side cleanup. // +// # Virtual Functions +// +// Each Virtual Function is assigned as an independent guest device with its own +// VMBus GUID. Multiple Virtual Functions on the same physical device are treated +// as separate devices in the guest. +// // # Guest Requests // // On LCOW, assigning a vPCI device requires a guest-side notification so the diff --git a/internal/controller/device/vpci/interface.go b/internal/controller/device/vpci/types.go similarity index 57% rename from internal/controller/device/vpci/interface.go rename to internal/controller/device/vpci/types.go index 347143b486..953d4b46f0 100644 --- a/internal/controller/device/vpci/interface.go +++ b/internal/controller/device/vpci/types.go @@ -5,33 +5,19 @@ package vpci import ( "context" + "github.com/Microsoft/go-winio/pkg/guid" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -// Controller manages the lifecycle of vPCI devices assigned to a UVM. -type Controller interface { - // AddToVM assigns a vPCI device to the VM. If the same device is already - // assigned, the reference count is incremented. - AddToVM(ctx context.Context, opts *AddOptions) error - - // RemoveFromVM removes a vPCI device identified by vmBusGUID from the VM. - // If the device is shared (reference count > 1), the reference count is - // decremented without actually removing the device. - RemoveFromVM(ctx context.Context, vmBusGUID string) error -} - -// AddOptions holds the configuration required to assign a vPCI device to the VM. -type AddOptions struct { +// Device holds the configuration required to assign a vPCI device to the VM. +type Device struct { // DeviceInstanceID is the host device instance path of the vPCI device. DeviceInstanceID string // VirtualFunctionIndex is the SR-IOV virtual function index to assign. VirtualFunctionIndex uint16 - - // VMBusGUID identifies the VirtualPci device (backed by a VMBus channel) - // inside the UVM. - VMBusGUID string } // vmVPCI manages adding and removing vPCI devices for a Utility VM. @@ -55,28 +41,21 @@ type linuxGuestVPCI interface { // INTERNAL DATA STRUCTURES // ============================================================================== -// deviceKey uniquely identifies a host vPCI device by its instance ID and -// virtual function index. -type deviceKey struct { - deviceInstanceID string - virtualFunctionIndex uint16 -} - // deviceInfo records one vPCI device's assignment state and reference count. type deviceInfo struct { - // key is the immutable host device identifier used to detect duplicate + // device is the immutable host device identifier used to detect duplicate // assignment requests. - key deviceKey + device Device - // vmBusGUID identifies the VirtualPci device (backed by a VMBus channel) + // vmBusGUID identifies the vPCI device (backed by a VMBus channel) // inside the UVM. - vmBusGUID string + vmBusGUID guid.GUID // refCount is the number of active callers sharing this device. - // Access must be guarded by [Manager.mu]. + // Access must be guarded by [Controller.mu]. refCount uint32 // invalid indicates the host-side assignment succeeded but the guest-side - // assignment failed. Access must be guarded by [Manager.mu]. + // assignment failed. Access must be guarded by [Controller.mu]. invalid bool } diff --git a/internal/controller/device/vpci/utils.go b/internal/controller/device/vpci/utils.go index 3d8395362a..0605d89b20 100644 --- a/internal/controller/device/vpci/utils.go +++ b/internal/controller/device/vpci/utils.go @@ -41,23 +41,23 @@ func GetDeviceInfoFromPath(rawDevicePath string) (string, uint16) { indexString := filepath.Base(rawDevicePath) index, err := strconv.ParseUint(indexString, 10, 16) if err == nil { - // we have a vf index + // We have a VF index. return filepath.Dir(rawDevicePath), uint16(index) } - // otherwise, just use default index and full device ID given + // Otherwise, just use default index and the full device ID as given. return rawDevicePath, 0 } // GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBus channel // device node created when a device is assigned to a UVM via vPCI. // -// When a device is assigned to a UVM via VPCI support in HCS, a new VMBUS channel device node is -// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBUS +// When a device is assigned to a UVM via vPCI support in HCS, a new VMBus channel device node is +// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBus // channel device node. // // A device node's instance ID is an identifier that distinguishes that device from other devices -// on the system. The GUID of a VMBUS channel device node refers to that channel's unique -// identifier used internally by VMBUS and can be used to determine the VMBUS channel +// on the system. The GUID of a VMBus channel device node refers to that channel's unique +// identifier used internally by VMBus and can be used to determine the VMBus channel // device node's instance ID. // // A VMBus channel device node's instance ID is in the form: diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go index ffcafe0735..87f919d0a4 100644 --- a/internal/controller/device/vpci/vpci.go +++ b/internal/controller/device/vpci/vpci.go @@ -7,23 +7,25 @@ import ( "fmt" "sync" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/sirupsen/logrus" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" ) -// Manager is the concrete implementation of [Controller]. -type Manager struct { +// Controller manages vPCI device assignments for a Utility VM. +type Controller struct { mu sync.Mutex // devices tracks currently assigned vPCI devices, keyed by VMBus GUID. // Guarded by mu. - devices map[string]*deviceInfo + devices map[guid.GUID]*deviceInfo - // keyToGUID maps a [deviceKey] to its VMBus GUID for duplicate detection - // during [Manager.AddToVM]. Guarded by mu. - keyToGUID map[deviceKey]string + // deviceToGUID maps a [Device] to its VMBus GUID for duplicate detection + // during [Controller.Reserve]. Guarded by mu. + deviceToGUID map[Device]guid.GUID // vmVPCI performs host-side vPCI device add/remove on the VM. vmVPCI vmVPCI @@ -32,88 +34,98 @@ type Manager struct { linuxGuestVPCI linuxGuestVPCI } -var _ Controller = (*Manager)(nil) - -// New creates a ready-to-use [Manager]. +// New creates a ready-to-use [Controller]. func New( vmVPCI vmVPCI, linuxGuestVPCI linuxGuestVPCI, -) *Manager { - return &Manager{ +) *Controller { + return &Controller{ vmVPCI: vmVPCI, linuxGuestVPCI: linuxGuestVPCI, - devices: make(map[string]*deviceInfo), - keyToGUID: make(map[deviceKey]string), + devices: make(map[guid.GUID]*deviceInfo), + deviceToGUID: make(map[Device]guid.GUID), } } -// AddToVM assigns a vPCI device to the VM. -// If the same device is already assigned, the existing assignment is reused. -func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { - if opts.VMBusGUID == "" { - return fmt.Errorf("vmbus guid is required in add options") +// Reserve generates a unique VMBus GUID for the given vPCI device and records +// the reservation. The returned GUID can later be passed to [Controller.AddToVM] +// to actually assign the device to the VM. +// +// If the same device (identified by DeviceInstanceID and VirtualFunctionIndex) has +// already been reserved, the existing GUID is returned. +// +// Each Virtual Function is assigned as an independent guest device with its own +// VMBus GUID. Multiple Virtual Functions on the same physical device are treated +// as separate devices. +func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, error) { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.DeviceID: device.DeviceInstanceID, + logfields.VFIndex: device.VirtualFunctionIndex, + })) + + c.mu.Lock() + defer c.mu.Unlock() + + // If this device is already reserved, return the existing GUID. + if existingGUID, ok := c.deviceToGUID[device]; ok { + log.G(ctx).WithField(logfields.VMBusGUID, existingGUID).Debug("vPCI device already reserved, reusing existing GUID") + return existingGUID, nil + } + + // Generate a new VMBus GUID for this device. + vmBusGUID, err := guid.NewV4() + if err != nil { + return guid.GUID{}, fmt.Errorf("generate vmbus guid for device %s: %w", device.DeviceInstanceID, err) } - key := deviceKey{ - deviceInstanceID: opts.DeviceInstanceID, - virtualFunctionIndex: opts.VirtualFunctionIndex, + c.devices[vmBusGUID] = &deviceInfo{ + device: device, + vmBusGUID: vmBusGUID, } + c.deviceToGUID[device] = vmBusGUID + log.G(ctx).WithField(logfields.VMBusGUID, vmBusGUID).Debug("reserved vPCI device with new VMBus GUID") + return vmBusGUID, nil +} + +// AddToVM assigns a previously reserved vPCI device to the VM. +// The vmBusGUID must have been obtained from a prior call to [Controller.Reserve]. +// If the device is already assigned to the VM, the existing assignment is reused. +func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { // Set vmBusGUID in logging context. - ctx, _ = log.WithContext(ctx, logrus.WithField("vmBusGUID", opts.VMBusGUID)) - - m.mu.Lock() - defer m.mu.Unlock() - - // Check if the caller-provided GUID is already tracked. - if existingDev, ok := m.devices[opts.VMBusGUID]; ok { - // The GUID exists — verify the device settings match what was originally assigned. - // A mismatch means the caller is trying to reuse a GUID for a different device, - // which is a configuration error. - if existingDev.key != key { - return fmt.Errorf( - "vmBusGUID %s is already assigned to device (instanceID=%s, vfIndex=%d), but caller provided different settings (instanceID=%s, vfIndex=%d)", - opts.VMBusGUID, - existingDev.key.deviceInstanceID, existingDev.key.virtualFunctionIndex, - key.deviceInstanceID, key.virtualFunctionIndex, - ) - } - - // If a previous assignment left the device in an invalid state, - // reject new callers until the existing assignment is cleaned up. - if existingDev.invalid { - return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", opts.VMBusGUID) - } - - // Same GUID, same device — reuse the existing assignment. - existingDev.refCount++ - - log.G(ctx).WithFields(logrus.Fields{ - "deviceInstanceID": key.deviceInstanceID, - "virtualFunctionIndex": key.virtualFunctionIndex, - "refCount": existingDev.refCount, - }).Debug("vPCI device already assigned, reusing existing assignment") + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) - return nil + c.mu.Lock() + defer c.mu.Unlock() + + dev, ok := c.devices[vmBusGUID] + if !ok { + return fmt.Errorf("no reservation found for vmBusGUID %s; call Reserve first", vmBusGUID) } - // The GUID is new — check whether the same device key is already assigned - // under a different GUID. This means the caller provided an inconsistent GUID. - if existingGUID, ok := m.keyToGUID[key]; ok { - return fmt.Errorf( - "vpci device (instanceID=%s, vfIndex=%d) is already assigned with vmBusGUID %s, but caller provided %s", - key.deviceInstanceID, key.virtualFunctionIndex, - existingGUID, opts.VMBusGUID, - ) + // If a previous assignment left the device in an invalid state, + // reject new callers until the existing assignment is cleaned up. + if dev.invalid { + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", vmBusGUID) } - // Device not attached to VM. - // Build the VirtualPciDevice settings for HCS call. + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.DeviceID: dev.device.DeviceInstanceID, + logfields.VFIndex: dev.device.VirtualFunctionIndex, + })) + + // If the device is already assigned to the VM (host-side call was already made), + // just bump the reference count and return. + if dev.refCount > 0 { + dev.refCount++ + + log.G(ctx).Debug("vPCI device already assigned, reusing existing assignment") + + return nil + } - log.G(ctx).WithFields(logrus.Fields{ - "deviceInstanceID": key.deviceInstanceID, - "virtualFunctionIndex": key.virtualFunctionIndex, - }).Debug("assigning vPCI device to VM") + // Device not yet attached to VM. + log.G(ctx).Debug("assigning vPCI device to VM") // NUMA affinity is always propagated for assigned devices. // This feature is available on WS2025 and later. @@ -123,34 +135,30 @@ func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { settings := hcsschema.VirtualPciDevice{ Functions: []hcsschema.VirtualPciFunction{ { - DeviceInstancePath: opts.DeviceInstanceID, - VirtualFunction: opts.VirtualFunctionIndex, + DeviceInstancePath: dev.device.DeviceInstanceID, + VirtualFunction: dev.device.VirtualFunctionIndex, }, }, PropagateNumaAffinity: &propagateAffinity, } + guidStr := vmBusGUID.String() + // Host-side: add the vPCI device to the VM. - if err := m.vmVPCI.AddDevice(ctx, opts.VMBusGUID, settings); err != nil { - return fmt.Errorf("add vpci device %s to vm: %w", opts.DeviceInstanceID, err) + if err := c.vmVPCI.AddDevice(ctx, guidStr, settings); err != nil { + return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) } - // Track early so RemoveFromVM can clean up even if the guest-side call fails. - dev := &deviceInfo{ - key: key, - vmBusGUID: opts.VMBusGUID, - refCount: 1, - } - m.devices[opts.VMBusGUID] = dev - m.keyToGUID[key] = opts.VMBusGUID + // Update the ref count to indicate the device is now assigned to the VM. + dev.refCount++ // Guest-side: device attach notification. - if err := m.waitGuestDeviceReady(ctx, opts.VMBusGUID); err != nil { + if err := c.waitGuestDeviceReady(ctx, guidStr); err != nil { // Mark the device as invalid so the caller can call RemoveFromVM // to clean up the host-side assignment. dev.invalid = true log.G(ctx).WithError(err).Error("guest-side vpci device setup failed, device marked invalid") - return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", opts.VMBusGUID, err) + return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", vmBusGUID, err) } log.G(ctx).Info("vPCI device assigned to VM") @@ -161,37 +169,49 @@ func (m *Manager) AddToVM(ctx context.Context, opts *AddOptions) (err error) { // RemoveFromVM removes a vPCI device from the VM. // If the device is shared (reference count > 1), the reference count is // decremented without actually removing the device from the VM. -func (m *Manager) RemoveFromVM(ctx context.Context, vmBusGUID string) error { - m.mu.Lock() - defer m.mu.Unlock() +func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) error { + c.mu.Lock() + defer c.mu.Unlock() - ctx, _ = log.WithContext(ctx, logrus.WithField("vmBusGUID", vmBusGUID)) + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) - dev, ok := m.devices[vmBusGUID] + dev, ok := c.devices[vmBusGUID] if !ok { return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) } + // Device was reserved but never added to the VM. Just clean up the reservation. + if dev.refCount == 0 { + log.G(ctx).Debug("vPCI device was reserved but never assigned, cleaning up reservation") + + delete(c.devices, vmBusGUID) + delete(c.deviceToGUID, dev.device) + + return nil + } + + // Decrement the ref count for the device. dev.refCount-- if dev.refCount > 0 { log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") return nil } - // This path is reached when the device is no longer shared (refCount == 0) or - // had transitioned into an invalid state during AddToVM call. + // Last reference dropped (refCount == 0). Remove the device from the VM. + // This also covers devices marked invalid during AddToVM — the host-side + // assignment still needs to be cleaned up. log.G(ctx).Debug("removing vPCI device from VM") // Host-side: remove the vPCI device from the VM. - if err := m.vmVPCI.RemoveDevice(ctx, vmBusGUID); err != nil { + if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID.String()); err != nil { // Restore the ref count since the removal failed. dev.refCount++ return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) } - delete(m.devices, vmBusGUID) - delete(m.keyToGUID, dev.key) + delete(c.devices, vmBusGUID) + delete(c.deviceToGUID, dev.device) log.G(ctx).Info("vPCI device removed from VM") diff --git a/internal/controller/device/vpci/vpci_lcow.go b/internal/controller/device/vpci/vpci_lcow.go index f2391e91ee..c3778eda53 100644 --- a/internal/controller/device/vpci/vpci_lcow.go +++ b/internal/controller/device/vpci/vpci_lcow.go @@ -1,4 +1,4 @@ -//go:build windows && !wcow +//go:build windows && lcow package vpci @@ -10,8 +10,8 @@ import ( // waitGuestDeviceReady notifies the guest about the new device and blocks until // the required sysfs/device paths are available before workloads use them. -func (m *Manager) waitGuestDeviceReady(ctx context.Context, vmBusGUID string) error { - return m.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ +func (c *Controller) waitGuestDeviceReady(ctx context.Context, vmBusGUID string) error { + return c.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ VMBusGUID: vmBusGUID, }) } diff --git a/internal/controller/device/vpci/vpci_unsupported.go b/internal/controller/device/vpci/vpci_unsupported.go new file mode 100644 index 0000000000..64ef3c0a9b --- /dev/null +++ b/internal/controller/device/vpci/vpci_unsupported.go @@ -0,0 +1,13 @@ +//go:build windows && !wcow && !lcow + +package vpci + +import ( + "context" + "fmt" +) + +// waitGuestDeviceReady is a not supported for unsupported guests. +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ string) error { + return fmt.Errorf("unsupported guest") +} diff --git a/internal/controller/device/vpci/vpci_wcow.go b/internal/controller/device/vpci/vpci_wcow.go index 96a72516ed..9e41484909 100644 --- a/internal/controller/device/vpci/vpci_wcow.go +++ b/internal/controller/device/vpci/vpci_wcow.go @@ -6,6 +6,6 @@ import "context" // waitGuestDeviceReady is a no-op for Windows guests. WCOW does not require a // guest-side notification as part of vPCI device assignment. -func (m *Manager) waitGuestDeviceReady(_ context.Context, _ string) error { +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ string) error { return nil } diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index dac5a708e5..3f077cd6f8 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -70,6 +70,12 @@ const ( ShimPid = "shim-pid" TaskPid = "task-pid" + // vpci device + + VMBusGUID = "vmBusGUID" + DeviceID = "deviceInstanceID" + VFIndex = "virtualFunctionIndex" + // sandbox NetNsPath = "net-ns-path" From 8149e234174dc11b2659e015e1bf886305bcd786 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Wed, 1 Apr 2026 04:40:21 +0530 Subject: [PATCH 4/6] review 3 Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/vpci.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go index 87f919d0a4..4a9454b4c0 100644 --- a/internal/controller/device/vpci/vpci.go +++ b/internal/controller/device/vpci/vpci.go @@ -120,7 +120,6 @@ func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { dev.refCount++ log.G(ctx).Debug("vPCI device already assigned, reusing existing assignment") - return nil } @@ -214,6 +213,5 @@ func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) erro delete(c.deviceToGUID, dev.device) log.G(ctx).Info("vPCI device removed from VM") - return nil } From 75e3cd6147e55890d9cab2c1a013f4987a651b21 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Wed, 1 Apr 2026 05:22:10 +0530 Subject: [PATCH 5/6] review 4 Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/doc.go | 5 +- .../controller/device/vpci/mocks/mock_vpci.go | 110 +++ internal/controller/device/vpci/state.go | 71 ++ internal/controller/device/vpci/types.go | 12 +- internal/controller/device/vpci/vpci.go | 142 ++-- internal/controller/device/vpci/vpci_lcow.go | 6 +- internal/controller/device/vpci/vpci_test.go | 643 ++++++++++++++++++ .../device/vpci/vpci_unsupported.go | 4 +- internal/controller/device/vpci/vpci_wcow.go | 8 +- internal/vm/vmmanager/pci.go | 6 +- 10 files changed, 928 insertions(+), 79 deletions(-) create mode 100644 internal/controller/device/vpci/mocks/mock_vpci.go create mode 100644 internal/controller/device/vpci/state.go create mode 100644 internal/controller/device/vpci/vpci_test.go diff --git a/internal/controller/device/vpci/doc.go b/internal/controller/device/vpci/doc.go index 3c0dd04209..3b6a49ce47 100644 --- a/internal/controller/device/vpci/doc.go +++ b/internal/controller/device/vpci/doc.go @@ -24,8 +24,9 @@ // // # Invalid Devices // -// If the host-side assignment succeeds but the guest-side notification fails, -// the device is marked invalid. It remains tracked so that the caller can call +// The device is marked invalid if the host-side assignment succeeds but the +// guest-side notification fails or if the host-side remove call fails. +// The device remains tracked as Invalid so that the caller can call // [Controller.RemoveFromVM] to perform host-side cleanup. // // # Virtual Functions diff --git a/internal/controller/device/vpci/mocks/mock_vpci.go b/internal/controller/device/vpci/mocks/mock_vpci.go new file mode 100644 index 0000000000..c541fcc4bc --- /dev/null +++ b/internal/controller/device/vpci/mocks/mock_vpci.go @@ -0,0 +1,110 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: types.go +// +// Generated by this command: +// +// mockgen -source types.go -package mocks -destination mocks/mock_vpci.go +// + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + reflect "reflect" + + guid "github.com/Microsoft/go-winio/pkg/guid" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + guestresource "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + gomock "go.uber.org/mock/gomock" +) + +// MockvmVPCI is a mock of vmVPCI interface. +type MockvmVPCI struct { + ctrl *gomock.Controller + recorder *MockvmVPCIMockRecorder + isgomock struct{} +} + +// MockvmVPCIMockRecorder is the mock recorder for MockvmVPCI. +type MockvmVPCIMockRecorder struct { + mock *MockvmVPCI +} + +// NewMockvmVPCI creates a new mock instance. +func NewMockvmVPCI(ctrl *gomock.Controller) *MockvmVPCI { + mock := &MockvmVPCI{ctrl: ctrl} + mock.recorder = &MockvmVPCIMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockvmVPCI) EXPECT() *MockvmVPCIMockRecorder { + return m.recorder +} + +// AddDevice mocks base method. +func (m *MockvmVPCI) AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddDevice", ctx, vmbusGUID, settings) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddDevice indicates an expected call of AddDevice. +func (mr *MockvmVPCIMockRecorder) AddDevice(ctx, vmbusGUID, settings any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddDevice", reflect.TypeOf((*MockvmVPCI)(nil).AddDevice), ctx, vmbusGUID, settings) +} + +// RemoveDevice mocks base method. +func (m *MockvmVPCI) RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemoveDevice", ctx, vmbusGUID) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemoveDevice indicates an expected call of RemoveDevice. +func (mr *MockvmVPCIMockRecorder) RemoveDevice(ctx, vmbusGUID any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveDevice", reflect.TypeOf((*MockvmVPCI)(nil).RemoveDevice), ctx, vmbusGUID) +} + +// MocklinuxGuestVPCI is a mock of linuxGuestVPCI interface. +type MocklinuxGuestVPCI struct { + ctrl *gomock.Controller + recorder *MocklinuxGuestVPCIMockRecorder + isgomock struct{} +} + +// MocklinuxGuestVPCIMockRecorder is the mock recorder for MocklinuxGuestVPCI. +type MocklinuxGuestVPCIMockRecorder struct { + mock *MocklinuxGuestVPCI +} + +// NewMocklinuxGuestVPCI creates a new mock instance. +func NewMocklinuxGuestVPCI(ctrl *gomock.Controller) *MocklinuxGuestVPCI { + mock := &MocklinuxGuestVPCI{ctrl: ctrl} + mock.recorder = &MocklinuxGuestVPCIMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MocklinuxGuestVPCI) EXPECT() *MocklinuxGuestVPCIMockRecorder { + return m.recorder +} + +// AddVPCIDevice mocks base method. +func (m *MocklinuxGuestVPCI) AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddVPCIDevice", ctx, settings) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddVPCIDevice indicates an expected call of AddVPCIDevice. +func (mr *MocklinuxGuestVPCIMockRecorder) AddVPCIDevice(ctx, settings any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddVPCIDevice", reflect.TypeOf((*MocklinuxGuestVPCI)(nil).AddVPCIDevice), ctx, settings) +} diff --git a/internal/controller/device/vpci/state.go b/internal/controller/device/vpci/state.go new file mode 100644 index 0000000000..cd32ea7e83 --- /dev/null +++ b/internal/controller/device/vpci/state.go @@ -0,0 +1,71 @@ +//go:build windows + +package vpci + +// State represents the current state of a vPCI device assignment lifecycle. +// +// The normal progression is: +// +// StateReserved → StateAssigned → StateRemoved +// +// A device transitions to [StateInvalid] when an operation partially succeeds +// and leaves the VM in an inconsistent state. This can happen in two ways: +// - [Controller.AddToVM]: host-side assignment succeeds but guest-side notification fails. +// - [Controller.RemoveFromVM]: the host-side remove call fails. +// +// A device in [StateInvalid] can only be cleaned up via [Controller.RemoveFromVM]. +// +// Full state-transition table: +// +// Current State │ Trigger │ Next State +// ──────────────┼───────────────────────────────────────────────────────┼────────────────── +// StateReserved │ AddToVM succeeds │ StateAssigned +// StateReserved │ RemoveFromVM called │ StateRemoved +// StateAssigned │ RemoveFromVM refCount drops to 0, succeeds │ StateRemoved +// StateAssigned │ AddToVM (host succeeded, guest-side fails) │ StateInvalid +// StateAssigned │ RemoveFromVM refCount drops to 0, host-side fails │ StateInvalid +// StateInvalid │ RemoveFromVM succeeds │ StateRemoved +// StateInvalid │ RemoveFromVM host-side fails │ StateInvalid +// StateRemoved │ (terminal — no further transitions) │ — +type State int32 + +const ( + // StateReserved indicates that a VMBus GUID has been generated and the + // device has been recorded in the Controller, but it has not yet been + // assigned to the VM via a host-side HCS modify call. + // This is the initial state set by [Controller.Reserve]. + StateReserved State = iota + + // StateAssigned indicates the device has been assigned to the VM + // (host-side HCS modify succeeded and guest-side notification succeeded). + // The reference count may be greater than one when multiple callers share + // the same device. + StateAssigned + + // StateInvalid indicates the device is in an inconsistent state due to a + // partial failure. + // The device must be cleaned up by calling [Controller.RemoveFromVM]. + StateInvalid + + // StateRemoved indicates the device has been fully removed from the VM + // and is no longer tracked by the Controller. + // This is a terminal state — once reached, no further state transitions + // are possible. + StateRemoved +) + +// String returns a human-readable string representation of the device State. +func (s State) String() string { + switch s { + case StateReserved: + return "Reserved" + case StateAssigned: + return "Assigned" + case StateInvalid: + return "Invalid" + case StateRemoved: + return "Removed" + default: + return "Unknown" + } +} diff --git a/internal/controller/device/vpci/types.go b/internal/controller/device/vpci/types.go index 953d4b46f0..ca2446971c 100644 --- a/internal/controller/device/vpci/types.go +++ b/internal/controller/device/vpci/types.go @@ -24,10 +24,10 @@ type Device struct { // Implemented by [vmmanager.UtilityVM]. type vmVPCI interface { // AddDevice adds a vPCI device identified by `vmBusGUID` to the Utility VM with the provided settings. - AddDevice(ctx context.Context, vmBusGUID string, settings hcsschema.VirtualPciDevice) error + AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error // RemoveDevice removes the vPCI device identified by `vmBusGUID` from the Utility VM. - RemoveDevice(ctx context.Context, vmBusGUID string) error + RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error } // linuxGuestVPCI exposes vPCI device operations in the LCOW guest. @@ -51,11 +51,11 @@ type deviceInfo struct { // inside the UVM. vmBusGUID guid.GUID + // state is the current lifecycle state of this device assignment. + // Access must be guarded by [Controller.mu]. + state State + // refCount is the number of active callers sharing this device. // Access must be guarded by [Controller.mu]. refCount uint32 - - // invalid indicates the host-side assignment succeeded but the guest-side - // assignment failed. Access must be guarded by [Controller.mu]. - invalid bool } diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go index 4a9454b4c0..a37eba388c 100644 --- a/internal/controller/device/vpci/vpci.go +++ b/internal/controller/device/vpci/vpci.go @@ -8,10 +8,10 @@ import ( "sync" "github.com/Microsoft/go-winio/pkg/guid" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/sirupsen/logrus" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" ) @@ -81,6 +81,7 @@ func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, err c.devices[vmBusGUID] = &deviceInfo{ device: device, vmBusGUID: vmBusGUID, + state: StateReserved, } c.deviceToGUID[device] = vmBusGUID @@ -91,6 +92,8 @@ func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, err // AddToVM assigns a previously reserved vPCI device to the VM. // The vmBusGUID must have been obtained from a prior call to [Controller.Reserve]. // If the device is already assigned to the VM, the existing assignment is reused. +// +// On failure, the caller should call [Controller.RemoveFromVM] to clean up any partial assignment state. func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { // Set vmBusGUID in logging context. ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) @@ -103,65 +106,68 @@ func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { return fmt.Errorf("no reservation found for vmBusGUID %s; call Reserve first", vmBusGUID) } - // If a previous assignment left the device in an invalid state, - // reject new callers until the existing assignment is cleaned up. - if dev.invalid { - return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", vmBusGUID) - } - ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ logfields.DeviceID: dev.device.DeviceInstanceID, logfields.VFIndex: dev.device.VirtualFunctionIndex, })) - // If the device is already assigned to the VM (host-side call was already made), - // just bump the reference count and return. - if dev.refCount > 0 { + switch dev.state { + case StateAssigned: + // If the device is already assigned to the VM (host-side call was already made), + // just bump the reference count and return. dev.refCount++ - log.G(ctx).Debug("vPCI device already assigned, reusing existing assignment") - return nil - } - - // Device not yet attached to VM. - log.G(ctx).Debug("assigning vPCI device to VM") - - // NUMA affinity is always propagated for assigned devices. - // This feature is available on WS2025 and later. - // Since the V2 shims only support WS2025 and later, this is set as true. - propagateAffinity := true - settings := hcsschema.VirtualPciDevice{ - Functions: []hcsschema.VirtualPciFunction{ - { - DeviceInstancePath: dev.device.DeviceInstanceID, - VirtualFunction: dev.device.VirtualFunctionIndex, + case StateReserved: + // Device not yet attached to VM. + log.G(ctx).Debug("assigning vPCI device to VM") + + // NUMA affinity is always propagated for assigned devices. + // This feature is available on WS2025 and later. + // Since the V2 shims only support WS2025 and later, this is set as true. + propagateAffinity := true + + settings := hcsschema.VirtualPciDevice{ + Functions: []hcsschema.VirtualPciFunction{ + { + DeviceInstancePath: dev.device.DeviceInstanceID, + VirtualFunction: dev.device.VirtualFunctionIndex, + }, }, - }, - PropagateNumaAffinity: &propagateAffinity, - } - - guidStr := vmBusGUID.String() + PropagateNumaAffinity: &propagateAffinity, + } + + // Host-side: add the vPCI device to the VM. + // On failure the device stays in StateReserved — the caller may + // retry AddToVM directly without any cleanup. + if err := c.vmVPCI.AddDevice(ctx, vmBusGUID, settings); err != nil { + return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) + } + + // Guest-side: device attach notification. + if err := c.waitGuestDeviceReady(ctx, vmBusGUID); err != nil { + // Mark the device as invalid so the caller can call RemoveFromVM + // to clean up the host-side assignment. + dev.state = StateInvalid + return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", vmBusGUID, err) + } + + // device add succeeded; bump the ref count. + dev.refCount++ + dev.state = StateAssigned - // Host-side: add the vPCI device to the VM. - if err := c.vmVPCI.AddDevice(ctx, guidStr, settings); err != nil { - return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) - } + log.G(ctx).Info("vPCI device assigned to VM") - // Update the ref count to indicate the device is now assigned to the VM. - dev.refCount++ + case StateInvalid: + // The device add failed in a previous attempt after the host-side assignment + // succeeded. Call RemoveFromVM to clean up the host-side assignment. + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", vmBusGUID) + default: + // This state cannot be reached. + return fmt.Errorf("vpci device with vmBusGUID %s is in an unknown state %d", vmBusGUID, dev.state) - // Guest-side: device attach notification. - if err := c.waitGuestDeviceReady(ctx, guidStr); err != nil { - // Mark the device as invalid so the caller can call RemoveFromVM - // to clean up the host-side assignment. - dev.invalid = true - log.G(ctx).WithError(err).Error("guest-side vpci device setup failed, device marked invalid") - return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", vmBusGUID, err) } - log.G(ctx).Info("vPCI device assigned to VM") - return nil } @@ -179,39 +185,47 @@ func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) erro return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) } - // Device was reserved but never added to the VM. Just clean up the reservation. - if dev.refCount == 0 { + // Devices that were reserved but never assigned to the VM have no host-side + // state to clean up — just drop the reservation and return early. + if dev.state == StateReserved { log.G(ctx).Debug("vPCI device was reserved but never assigned, cleaning up reservation") - - delete(c.devices, vmBusGUID) - delete(c.deviceToGUID, dev.device) - + c.untrack(vmBusGUID, dev) return nil } - // Decrement the ref count for the device. - dev.refCount-- + // Decrement the ref count. For StateInvalid devices the ref count is + // always 0 (AddToVM never completed successfully or RemoveFromVM failed), so this is + // effectively a no-op and the device proceeds straight to host-side removal. if dev.refCount > 0 { + dev.refCount-- + } + + // If the state is assigned and there are still active references, + // do not remove the device from the VM yet. + if dev.state == StateAssigned && dev.refCount > 0 { log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") return nil } - // Last reference dropped (refCount == 0). Remove the device from the VM. - // This also covers devices marked invalid during AddToVM — the host-side - // assignment still needs to be cleaned up. - log.G(ctx).Debug("removing vPCI device from VM") // Host-side: remove the vPCI device from the VM. - if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID.String()); err != nil { - // Restore the ref count since the removal failed. - dev.refCount++ + if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID); err != nil { + // The host-side remove failed; the device is still partially assigned. + // Mark it StateInvalid so that callers can retry via RemoveFromVM. + dev.state = StateInvalid return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) } - delete(c.devices, vmBusGUID) - delete(c.deviceToGUID, dev.device) - + c.untrack(vmBusGUID, dev) log.G(ctx).Info("vPCI device removed from VM") return nil } + +// untrack removes a device from the controller's tracking maps and marks it as +// [StateRemoved]. Must be called with c.mu held. +func (c *Controller) untrack(vmBusGUID guid.GUID, dev *deviceInfo) { + dev.state = StateRemoved + delete(c.devices, vmBusGUID) + delete(c.deviceToGUID, dev.device) +} diff --git a/internal/controller/device/vpci/vpci_lcow.go b/internal/controller/device/vpci/vpci_lcow.go index c3778eda53..762893da1e 100644 --- a/internal/controller/device/vpci/vpci_lcow.go +++ b/internal/controller/device/vpci/vpci_lcow.go @@ -5,13 +5,15 @@ package vpci import ( "context" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) // waitGuestDeviceReady notifies the guest about the new device and blocks until // the required sysfs/device paths are available before workloads use them. -func (c *Controller) waitGuestDeviceReady(ctx context.Context, vmBusGUID string) error { +func (c *Controller) waitGuestDeviceReady(ctx context.Context, vmBusGUID guid.GUID) error { return c.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ - VMBusGUID: vmBusGUID, + VMBusGUID: vmBusGUID.String(), }) } diff --git a/internal/controller/device/vpci/vpci_test.go b/internal/controller/device/vpci/vpci_test.go new file mode 100644 index 0000000000..a2f85133e7 --- /dev/null +++ b/internal/controller/device/vpci/vpci_test.go @@ -0,0 +1,643 @@ +//go:build windows && lcow + +package vpci + +import ( + "context" + "errors" + "testing" + + "github.com/Microsoft/go-winio/pkg/guid" + "go.uber.org/mock/gomock" + + "github.com/Microsoft/hcsshim/internal/controller/device/vpci/mocks" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +func newTestDevice() Device { + return Device{ + DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", + VirtualFunctionIndex: 0, + } +} + +var ( + errHostAdd = errors.New("host add failed") + errHostRemove = errors.New("host remove failed") + errGuestAdd = errors.New("guest add failed") +) + +// ───────────────────────────────────────────────────────────────────────────── +// Reserve tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestReserve_NewDevice verifies that reserving a device returns a non-zero GUID +// and that the device appears in both tracking maps. +func TestReserve_NewDevice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev := newTestDevice() + g, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if g == (guid.GUID{}) { + t.Fatal("expected non-zero GUID") + } + if _, ok := c.devices[g]; !ok { + t.Error("device not in c.devices after Reserve") + } + if storedGUID, ok := c.deviceToGUID[dev]; !ok || storedGUID != g { + t.Error("device not in c.deviceToGUID after Reserve") + } + if c.devices[g].state != StateReserved { + t.Errorf("expected StateReserved, got %v", c.devices[g].state) + } +} + +// TestReserve_SameDeviceTwice verifies idempotency: a second Reserve for the +// same device returns the exact same GUID without creating a new entry. +func TestReserve_SameDeviceTwice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev := newTestDevice() + g1, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("first Reserve: %v", err) + } + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("second Reserve: %v", err) + } + if g1 != g2 { + t.Errorf("expected same GUID, got %v vs %v", g1, g2) + } + if len(c.devices) != 1 { + t.Errorf("expected 1 device entry, got %d", len(c.devices)) + } +} + +// TestReserve_DifferentVF verifies that two VFs on the same physical device are +// treated as independent reservations. +func TestReserve_DifferentVF(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev0 := Device{DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", VirtualFunctionIndex: 0} + dev1 := Device{DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", VirtualFunctionIndex: 1} + + g0, _ := c.Reserve(ctx, dev0) + g1, _ := c.Reserve(ctx, dev1) + + if g0 == g1 { + t.Error("different VFs should get different GUIDs") + } + if len(c.devices) != 2 { + t.Errorf("expected 2 device entries, got %d", len(c.devices)) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// AddToVM tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestAddToVM_NoReservation verifies that AddToVM returns an error when called +// with an unknown GUID. +func TestAddToVM_NoReservation(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + unknownGUID, _ := guid.NewV4() + err := c.AddToVM(ctx, unknownGUID) + if err == nil { + t.Fatal("expected error for unknown GUID") + } +} + +// TestAddToVM_HappyPath verifies a normal Reserve → AddToVM flow. +func TestAddToVM_HappyPath(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + di := c.devices[g] + if di.state != StateAssigned { + t.Errorf("expected StateAssigned, got %v", di.state) + } + if di.refCount != 1 { + t.Errorf("expected refCount=1, got %d", di.refCount) + } +} + +// TestAddToVM_Idempotent verifies that calling AddToVM twice on the same device +// increments the refCount but does not make a second host-side call. +func TestAddToVM_Idempotent(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host and guest calls must happen exactly once. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + + _ = c.AddToVM(ctx, g) + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("second AddToVM: %v", err) + } + + di := c.devices[g] + if di.refCount != 2 { + t.Errorf("expected refCount=2, got %d", di.refCount) + } + if di.state != StateAssigned { + t.Errorf("expected StateAssigned, got %v", di.state) + } +} + +// TestAddToVM_HostFails verifies that a host-side failure leaves the device in +// StateReserved (no transition to StateInvalid) and does not bump the refCount. +func TestAddToVM_HostFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error") + } + + di := c.devices[g] + + if di.refCount != 0 { + t.Errorf("expected refCount=0 after host failure, got %d", di.refCount) + } + if di.state != StateReserved { + t.Errorf("expected StateReserved after host failure, got %v", di.state) + } +} + +// TestAddToVM_GuestFails verifies that a guest-side failure marks the device +// StateInvalid and does not bump the refCount. +func TestAddToVM_GuestFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error") + } + + di := c.devices[g] + if di.state != StateInvalid { + t.Errorf("expected StateInvalid after guest failure, got %v", di.state) + } + if di.refCount != 0 { + t.Errorf("expected refCount=0 after guest failure, got %d", di.refCount) + } +} + +// TestAddToVM_InvalidDevice verifies that AddToVM on a StateInvalid device +// returns an error and does not attempt a new host/guest call. +func TestAddToVM_InvalidDevice(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // First AddToVM: host succeeds, guest fails → StateInvalid. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g) + + // Second AddToVM: must NOT call host or guest again. + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error for StateInvalid device") + } +} + +// TestAddToVM_ReservedTwice_ThenAdd exercises the scenario where the same +// device is reserved (getting the same GUID back), then AddToVM is called. +// The reservation itself is idempotent, so AddToVM should be called only once +// for the underlying host/guest pair. +func TestAddToVM_ReservedTwice_ThenAdd(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + g2, _ := c.Reserve(ctx, dev) + + if g1 != g2 { + t.Fatal("expected same GUID from double Reserve") + } + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + + if err := c.AddToVM(ctx, g1); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// RemoveFromVM tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestRemoveFromVM_NoDevice verifies that removing an unknown GUID returns an error. +func TestRemoveFromVM_NoDevice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + unknownGUID, _ := guid.NewV4() + err := c.RemoveFromVM(ctx, unknownGUID) + if err == nil { + t.Fatal("expected error for unknown GUID") + } +} + +// TestRemoveFromVM_ReservedButNeverAdded verifies that removing a device that +// was reserved but never added cleans up the maps without touching the host. +func TestRemoveFromVM_ReservedButNeverAdded(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // RemoveDevice must NOT be called. + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after removal of reserved-only device") + } + if _, ok := c.deviceToGUID[dev]; ok { + t.Error("deviceToGUID still has entry after removal of reserved-only device") + } +} + +// TestRemoveFromVM_HappyPath verifies a full Reserve → AddToVM → RemoveFromVM cycle. +func TestRemoveFromVM_HappyPath(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after successful removal") + } +} + +// TestRemoveFromVM_RefCounting verifies that the device is only removed from +// the host when the last reference is dropped. +func TestRemoveFromVM_RefCounting(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + _ = c.AddToVM(ctx, g) + _ = c.AddToVM(ctx, g) // refCount → 2 + + // First remove: decrements ref to 1, no host call. + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("first remove error: %v", err) + } + if c.devices[g].refCount != 1 { + t.Errorf("expected refCount=1 after first remove, got %d", c.devices[g].refCount) + } + + // Second remove: drops to 0, triggers host remove. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil).Times(1) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("second remove error: %v", err) + } + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after last ref dropped") + } +} + +// TestRemoveFromVM_HostFails verifies that a failed host-side remove marks the +// device StateInvalid so it can be retried. +func TestRemoveFromVM_HostFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + // First remove attempt fails. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(errHostRemove) + err := c.RemoveFromVM(ctx, g) + if err == nil { + t.Fatal("expected error on host remove failure") + } + + di := c.devices[g] + if di.state != StateInvalid { + t.Errorf("expected StateInvalid after failed remove, got %v", di.state) + } + if di.refCount != 0 { + t.Errorf("expected refCount=0 after failed remove, got %d", di.refCount) + } +} + +// TestRemoveFromVM_HostFails_ThenRetry verifies that after a failed host remove +// (device is now StateInvalid with refCount=0), a retry via RemoveFromVM +// succeeds and cleans up the maps. +func TestRemoveFromVM_HostFails_ThenRetry(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + // First remove: host fails. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(errHostRemove) + _ = c.RemoveFromVM(ctx, g) + + // Retry: host succeeds. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("retry RemoveFromVM failed: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after successful retry removal") + } +} + +// TestRemoveFromVM_InvalidDevice_AfterGuestFail verifies that a device stuck in +// StateInvalid (due to guest failure in AddToVM) can be cleaned up via RemoveFromVM. +func TestRemoveFromVM_InvalidDevice_AfterGuestFail(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // AddToVM: host succeeds, guest fails. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g) + + // RemoveFromVM should issue a host remove and clean up. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("RemoveFromVM on invalid device failed: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after cleanup of invalid device") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: Reserve deduplication vs. state transitions +// ───────────────────────────────────────────────────────────────────────────── + +// TestReserve_AfterRemove verifies what happens when Reserve is called for a +// device that was previously removed. +// After RemoveFromVM the device is untracked from deviceToGUID, so Reserve +// should treat it as a brand-new device and return a fresh GUID. +func TestReserve_AfterRemove(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g1) + + vm.EXPECT().RemoveDevice(gomock.Any(), g1).Return(nil) + _ = c.RemoveFromVM(ctx, g1) + + // Reserve again after full removal: should get a new GUID. + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("Reserve after remove failed: %v", err) + } + if g2 == g1 { + t.Error("expected a new GUID after re-reserving a fully removed device") + } +} + +// TestReserve_AfterGuestFailure verifies what Reserve returns for a device that +// is currently StateInvalid (guest failed, host succeeded). +// Since the device is still in deviceToGUID, Reserve should return the SAME GUID. +func TestReserve_AfterGuestFailure(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g1) + + // Device is now StateInvalid and still in deviceToGUID. + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("Reserve after guest failure: %v", err) + } + if g2 != g1 { + t.Errorf("expected same GUID for invalid device on re-reserve, got different") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: RemoveFromVM on a StateRemoved device (already untracked) +// ───────────────────────────────────────────────────────────────────────────── + +// TestRemoveFromVM_AlreadyRemoved verifies that calling RemoveFromVM twice +// returns an error on the second call (device no longer in map). +func TestRemoveFromVM_AlreadyRemoved(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + _ = c.RemoveFromVM(ctx, g) + + // Second call: device is no longer tracked. + err := c.RemoveFromVM(ctx, g) + if err == nil { + t.Fatal("expected error when removing an already-removed device") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: AddToVM propagates correct HCS settings +// ───────────────────────────────────────────────────────────────────────────── + +// TestAddToVM_HCSSettings verifies that AddToVM passes the correct VirtualPciDevice +// settings including PropagateNumaAffinity=true and the device instance path. +func TestAddToVM_HCSSettings(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := Device{ + DeviceInstanceID: "PCI\\VEN_ABCD&DEV_1234\\99", + VirtualFunctionIndex: 3, + } + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT(). + AddDevice(gomock.Any(), g, gomock.AssignableToTypeOf(hcsschema.VirtualPciDevice{})). + DoAndReturn(func(_ context.Context, _ guid.GUID, settings hcsschema.VirtualPciDevice) error { + if settings.PropagateNumaAffinity == nil || !*settings.PropagateNumaAffinity { + t.Error("expected PropagateNumaAffinity=true") + } + if len(settings.Functions) != 1 { + t.Errorf("expected 1 function, got %d", len(settings.Functions)) + } + fn := settings.Functions[0] + if fn.DeviceInstancePath != dev.DeviceInstanceID { + t.Errorf("expected DeviceInstancePath=%q, got %q", dev.DeviceInstanceID, fn.DeviceInstancePath) + } + if fn.VirtualFunction != dev.VirtualFunctionIndex { + t.Errorf("expected VirtualFunction=%d, got %d", dev.VirtualFunctionIndex, fn.VirtualFunction) + } + return nil + }) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestAddToVM_GuestVMBusGUIDForwarded verifies that the correct vmBusGUID string +// is forwarded to the guest-side AddVPCIDevice call. +func TestAddToVM_GuestVMBusGUIDForwarded(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT(). + AddVPCIDevice(gomock.Any(), gomock.AssignableToTypeOf(guestresource.LCOWMappedVPCIDevice{})). + DoAndReturn(func(_ context.Context, req guestresource.LCOWMappedVPCIDevice) error { + if req.VMBusGUID != g.String() { + t.Errorf("expected VMBusGUID=%q, got %q", g.String(), req.VMBusGUID) + } + return nil + }) + + _ = c.AddToVM(ctx, g) +} diff --git a/internal/controller/device/vpci/vpci_unsupported.go b/internal/controller/device/vpci/vpci_unsupported.go index 64ef3c0a9b..31729ac881 100644 --- a/internal/controller/device/vpci/vpci_unsupported.go +++ b/internal/controller/device/vpci/vpci_unsupported.go @@ -5,9 +5,11 @@ package vpci import ( "context" "fmt" + + "github.com/Microsoft/go-winio/pkg/guid" ) // waitGuestDeviceReady is a not supported for unsupported guests. -func (c *Controller) waitGuestDeviceReady(_ context.Context, _ string) error { +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ guid.GUID) error { return fmt.Errorf("unsupported guest") } diff --git a/internal/controller/device/vpci/vpci_wcow.go b/internal/controller/device/vpci/vpci_wcow.go index 9e41484909..db7846a938 100644 --- a/internal/controller/device/vpci/vpci_wcow.go +++ b/internal/controller/device/vpci/vpci_wcow.go @@ -2,10 +2,14 @@ package vpci -import "context" +import ( + "context" + + "github.com/Microsoft/go-winio/pkg/guid" +) // waitGuestDeviceReady is a no-op for Windows guests. WCOW does not require a // guest-side notification as part of vPCI device assignment. -func (c *Controller) waitGuestDeviceReady(_ context.Context, _ string) error { +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ guid.GUID) error { return nil } diff --git a/internal/vm/vmmanager/pci.go b/internal/vm/vmmanager/pci.go index fe5fc291e2..f3d12a3ad3 100644 --- a/internal/vm/vmmanager/pci.go +++ b/internal/vm/vmmanager/pci.go @@ -6,12 +6,14 @@ import ( "context" "fmt" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" ) -func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID string, settings hcsschema.VirtualPciDevice) error { +func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error { request := &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vmbusGUID), RequestType: guestrequest.RequestTypeAdd, @@ -23,7 +25,7 @@ func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID string, settings return nil } -func (uvm *UtilityVM) RemoveDevice(ctx context.Context, vmbusGUID string) error { +func (uvm *UtilityVM) RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error { request := &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vmbusGUID), RequestType: guestrequest.RequestTypeRemove, From e3b8e8e388ecc59197fbb23e124838f6df295313 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Thu, 2 Apr 2026 03:27:16 +0530 Subject: [PATCH 6/6] review 5 Signed-off-by: Harsh Rawat --- internal/controller/device/vpci/doc.go | 48 +++++++-- internal/controller/device/vpci/state.go | 78 +++++++++----- internal/controller/device/vpci/vpci.go | 99 ++++++++++-------- internal/controller/device/vpci/vpci_test.go | 103 ++++++++++++++----- 4 files changed, 227 insertions(+), 101 deletions(-) diff --git a/internal/controller/device/vpci/doc.go b/internal/controller/device/vpci/doc.go index 3b6a49ce47..c00562aae9 100644 --- a/internal/controller/device/vpci/doc.go +++ b/internal/controller/device/vpci/doc.go @@ -10,17 +10,43 @@ // within UVM) in an internal map. Each assignment is reference-counted to // support shared access by multiple callers. // -// - [Controller.Reserve] generates a unique VMBus GUID for a device and -// records the reservation. If the same device is already reserved, the -// existing GUID is returned. -// - [Controller.AddToVM] assigns a previously reserved device to the VM -// using the VMBus GUID returned by Reserve. If the device is already -// assigned, the reference count is incremented and the call succeeds -// without a second host-side assignment. -// - [Controller.RemoveFromVM] decrements the reference count for the device -// identified by VMBus GUID. When it reaches zero, the device is removed -// from the VM. It also handles cleanup for devices that were reserved -// but never assigned. +// A device follows the state machine below. +// +// ┌─────────────────┐ +// │ StateReserved │ +// └────────┬────────┘ +// │ AddToVM host ok +// ▼ +// ┌─────────────────┐ AddToVM host fails ┌─────────────────┐ +// │ StateAssigned │──────────────────────────►│ StateRemoved │ +// └────────┬────────┘ └────────┬────────┘ +// ┌───────────┤ │ RemoveFromVM +// │ │ waitGuest ok ▼ +// │ ▼ (untracked) +// │ ┌─────────────────┐ +// │ │ StateReady │◄── AddToVM (refCount++) +// │ └────────┬────────┘ +// │ │ RemoveFromVM ok +// │ ▼ +// │ (untracked) +// │ +// │ ┌──────────────────────┐ +// └──waitGuest fail───────────────►│ StateAssignedInvalid │◄── RemoveFromVM host fails +// └──────────┬───────────┘ +// │ RemoveFromVM ok +// ▼ +// (untracked) +// +// - [Controller.Reserve] generates a unique VMBus GUID for a device and +// records the reservation. If the same device is already reserved, the +// existing GUID is returned. +// - [Controller.AddToVM] assigns a previously reserved device to the VM +// using the VMBus GUID returned by Reserve. If the device is already +// ready for use in the VM, the reference count is incremented. +// - [Controller.RemoveFromVM] decrements the reference count for the device +// identified by VMBus GUID. When it reaches zero, the device is removed +// from the VM. It also handles cleanup for devices that were reserved +// but never assigned, and for devices in an invalid state. // // # Invalid Devices // diff --git a/internal/controller/device/vpci/state.go b/internal/controller/device/vpci/state.go index cd32ea7e83..dcecdcbb52 100644 --- a/internal/controller/device/vpci/state.go +++ b/internal/controller/device/vpci/state.go @@ -6,27 +6,35 @@ package vpci // // The normal progression is: // -// StateReserved → StateAssigned → StateRemoved +// StateReserved → StateAssigned → StateReady → StateRemoved+untracked // -// A device transitions to [StateInvalid] when an operation partially succeeds +// [StateAssigned] is a transient state set within a single [Controller.AddToVM] call +// after the host-side HCS modify succeeds. [waitGuestDeviceReady] is then called; on +// success the device moves to [StateReady], on failure to [StateAssignedInvalid]. +// +// A device transitions to [StateAssignedInvalid] when an operation partially succeeds // and leaves the VM in an inconsistent state. This can happen in two ways: // - [Controller.AddToVM]: host-side assignment succeeds but guest-side notification fails. // - [Controller.RemoveFromVM]: the host-side remove call fails. // -// A device in [StateInvalid] can only be cleaned up via [Controller.RemoveFromVM]. +// A device in [StateAssignedInvalid] can only be cleaned up via [Controller.RemoveFromVM]. // // Full state-transition table: // -// Current State │ Trigger │ Next State -// ──────────────┼───────────────────────────────────────────────────────┼────────────────── -// StateReserved │ AddToVM succeeds │ StateAssigned -// StateReserved │ RemoveFromVM called │ StateRemoved -// StateAssigned │ RemoveFromVM refCount drops to 0, succeeds │ StateRemoved -// StateAssigned │ AddToVM (host succeeded, guest-side fails) │ StateInvalid -// StateAssigned │ RemoveFromVM refCount drops to 0, host-side fails │ StateInvalid -// StateInvalid │ RemoveFromVM succeeds │ StateRemoved -// StateInvalid │ RemoveFromVM host-side fails │ StateInvalid -// StateRemoved │ (terminal — no further transitions) │ — +// Current State │ Trigger │ Next State +// ─────────────────────┼────────────────────────────────────────────────────┼────────────────────── +// StateReserved │ AddToVM host-side succeeds │ StateAssigned +// StateReserved │ AddToVM host-side fails │ StateRemoved +// StateReserved │ RemoveFromVM called │ (untracked) +// StateAssigned │ waitGuestDeviceReady succeeds │ StateReady +// StateAssigned │ waitGuestDeviceReady fails │ StateAssignedInvalid +// StateReady │ AddToVM called (refCount++) │ StateReady +// StateReady │ RemoveFromVM refCount drops to 0, succeeds │ (untracked) +// StateReady │ RemoveFromVM refCount drops to 0, host-side fails │ StateAssignedInvalid +// StateAssignedInvalid │ RemoveFromVM succeeds │ (untracked) +// StateAssignedInvalid │ RemoveFromVM host-side fails │ StateAssignedInvalid +// StateRemoved │ AddToVM called │ error (call RemoveFromVM) +// StateRemoved │ RemoveFromVM called │ (untracked) type State int32 const ( @@ -36,21 +44,37 @@ const ( // This is the initial state set by [Controller.Reserve]. StateReserved State = iota - // StateAssigned indicates the device has been assigned to the VM - // (host-side HCS modify succeeded and guest-side notification succeeded). + // StateAssigned is a transient state that indicates the host-side HCS modify + // has succeeded but [waitGuestDeviceReady] has not yet been called/completed + // within a single [Controller.AddToVM] invocation. + // External callers should never observe this state. + StateAssigned + + // StateReady indicates the device has been fully assigned to the VM: + // the host-side HCS modify succeeded and the guest-side device is ready. // The reference count may be greater than one when multiple callers share // the same device. - StateAssigned + StateReady - // StateInvalid indicates the device is in an inconsistent state due to a - // partial failure. - // The device must be cleaned up by calling [Controller.RemoveFromVM]. - StateInvalid + // StateAssignedInvalid indicates the device is in an inconsistent state due to a + // partial failure. This state is reached in two ways: + // - [Controller.AddToVM]: the host-side assignment succeeded but the + // guest-side notification failed; the host-side assignment still exists + // but the guest-side device is not in a usable state. + // - [Controller.RemoveFromVM]: the host-side remove call failed; the + // host-side assignment still exists but the reference count has been + // decremented to zero. + // In either case the device must be cleaned up by calling [Controller.RemoveFromVM]. + StateAssignedInvalid - // StateRemoved indicates the device has been fully removed from the VM - // and is no longer tracked by the Controller. - // This is a terminal state — once reached, no further state transitions - // are possible. + // StateRemoved indicates that no host-side VM assignment exists for this device. + // This state is reached in two ways: + // - [Controller.AddToVM]: the host-side add call failed. The device is still + // tracked in the Controller and the caller must call [Controller.RemoveFromVM] + // to clean up the reservation. No further [Controller.AddToVM] calls are allowed. + // - [Controller.untrack]: set as a safety marker immediately before the device + // is deleted from the tracking maps. In this case the state is never externally + // observable — the device is gone from the map by the time the lock is released. StateRemoved ) @@ -61,8 +85,10 @@ func (s State) String() string { return "Reserved" case StateAssigned: return "Assigned" - case StateInvalid: - return "Invalid" + case StateReady: + return "Ready" + case StateAssignedInvalid: + return "AssignedInvalid" case StateRemoved: return "Removed" default: diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go index a37eba388c..aa8a675150 100644 --- a/internal/controller/device/vpci/vpci.go +++ b/internal/controller/device/vpci/vpci.go @@ -91,9 +91,9 @@ func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, err // AddToVM assigns a previously reserved vPCI device to the VM. // The vmBusGUID must have been obtained from a prior call to [Controller.Reserve]. -// If the device is already assigned to the VM, the existing assignment is reused. +// If the device is already ready for use in the VM, the reference count is incremented. // -// On failure, the caller should call [Controller.RemoveFromVM] to clean up any partial assignment state. +// On failure the caller should call [Controller.RemoveFromVM] to clean up. func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { // Set vmBusGUID in logging context. ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) @@ -112,14 +112,13 @@ func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { })) switch dev.state { - case StateAssigned: - // If the device is already assigned to the VM (host-side call was already made), - // just bump the reference count and return. + case StateReady: + // Device is already fully assigned and guest-ready; just bump the ref count. dev.refCount++ - log.G(ctx).Debug("vPCI device already assigned, reusing existing assignment") + log.G(ctx).Debug("vPCI device already ready, reusing existing assignment") case StateReserved: - // Device not yet attached to VM. + // Device not yet attached to VM — perform the host-side add. log.G(ctx).Debug("assigning vPCI device to VM") // NUMA affinity is always propagated for assigned devices. @@ -138,34 +137,45 @@ func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { } // Host-side: add the vPCI device to the VM. - // On failure the device stays in StateReserved — the caller may - // retry AddToVM directly without any cleanup. if err := c.vmVPCI.AddDevice(ctx, vmBusGUID, settings); err != nil { + // Set state to removed on failure. + // The caller can call RemoveFromVM to clean up the reservation. + dev.state = StateRemoved return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) } - // Guest-side: device attach notification. + // Host-side succeeded; mark as assigned (transient state) before + // waiting for the guest. + dev.state = StateAssigned + + // Guest-side: wait for the device to be ready inside the guest. if err := c.waitGuestDeviceReady(ctx, vmBusGUID); err != nil { - // Mark the device as invalid so the caller can call RemoveFromVM + // Host assignment is in place but guest is not ready. + // Mark StateAssignedInvalid so the caller can call RemoveFromVM // to clean up the host-side assignment. - dev.state = StateInvalid - return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", vmBusGUID, err) + dev.state = StateAssignedInvalid + return fmt.Errorf("wait for guest vpci device with vmBusGUID %s to become ready: %w", vmBusGUID, err) } - // device add succeeded; bump the ref count. + // Both host and guest succeeded; device is fully ready. dev.refCount++ - dev.state = StateAssigned + dev.state = StateReady log.G(ctx).Info("vPCI device assigned to VM") - case StateInvalid: + case StateAssignedInvalid: // The device add failed in a previous attempt after the host-side assignment - // succeeded. Call RemoveFromVM to clean up the host-side assignment. - return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", vmBusGUID) - default: - // This state cannot be reached. - return fmt.Errorf("vpci device with vmBusGUID %s is in an unknown state %d", vmBusGUID, dev.state) + // succeeded. Call RemoveFromVM to clean up the host-side assignment before retrying. + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state; call RemoveFromVM first", vmBusGUID) + case StateRemoved: + // The device failed to be added to the VM and hence was moved to state removed. + return fmt.Errorf("vpci device with vmBusGUID %s was removed due to a prior failure; call RemoveFromVM first", vmBusGUID) + + default: + // StateAssigned should never be observed by callers (it is a transient + // within-call state). + return fmt.Errorf("vpci device with vmBusGUID %s is in an unexpected state %s", vmBusGUID, dev.state) } return nil @@ -185,26 +195,32 @@ func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) erro return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) } - // Devices that were reserved but never assigned to the VM have no host-side - // state to clean up — just drop the reservation and return early. - if dev.state == StateReserved { - log.G(ctx).Debug("vPCI device was reserved but never assigned, cleaning up reservation") + switch dev.state { + case StateReserved, StateRemoved: + // Device was reserved but never assigned to the VM (or never assigned). + // No host-side state to clean up — just drop the tracking entry. + log.G(ctx).WithField("state", dev.state).Debug("vPCI device has no host-side assignment, cleaning up reservation") c.untrack(vmBusGUID, dev) return nil - } - // Decrement the ref count. For StateInvalid devices the ref count is - // always 0 (AddToVM never completed successfully or RemoveFromVM failed), so this is - // effectively a no-op and the device proceeds straight to host-side removal. - if dev.refCount > 0 { - dev.refCount-- - } + case StateReady: + // Decrement ref count; only remove from the host when the last reference is dropped. + if dev.refCount > 1 { + dev.refCount-- + log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") + return nil + } - // If the state is assigned and there are still active references, - // do not remove the device from the VM yet. - if dev.state == StateAssigned && dev.refCount > 0 { - log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") - return nil + // Last reference — fall through to host-side remove below. + dev.refCount = 0 + + case StateAssignedInvalid: + // Host-side assignment exists but device is in an inconsistent state. + // Proceed directly to host-side remove (refCount is always 0 here). + + default: + // StateAssigned is a transient within-call state and should not be seen here. + return fmt.Errorf("vpci device with vmBusGUID %s is in an unexpected state %s", vmBusGUID, dev.state) } log.G(ctx).Debug("removing vPCI device from VM") @@ -212,8 +228,8 @@ func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) erro // Host-side: remove the vPCI device from the VM. if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID); err != nil { // The host-side remove failed; the device is still partially assigned. - // Mark it StateInvalid so that callers can retry via RemoveFromVM. - dev.state = StateInvalid + // Mark it StateAssignedInvalid so that callers can retry via RemoveFromVM. + dev.state = StateAssignedInvalid return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) } @@ -222,8 +238,9 @@ func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) erro return nil } -// untrack removes a device from the controller's tracking maps and marks it as -// [StateRemoved]. Must be called with c.mu held. +// untrack removes a device from the controller's tracking maps and sets its +// state to [StateRemoved] as a safety marker. +// Must be called with c.mu held. func (c *Controller) untrack(vmBusGUID guid.GUID, dev *deviceInfo) { dev.state = StateRemoved delete(c.devices, vmBusGUID) diff --git a/internal/controller/device/vpci/vpci_test.go b/internal/controller/device/vpci/vpci_test.go index a2f85133e7..8010505510 100644 --- a/internal/controller/device/vpci/vpci_test.go +++ b/internal/controller/device/vpci/vpci_test.go @@ -140,8 +140,8 @@ func TestAddToVM_HappyPath(t *testing.T) { } di := c.devices[g] - if di.state != StateAssigned { - t.Errorf("expected StateAssigned, got %v", di.state) + if di.state != StateReady { + t.Errorf("expected StateReady, got %v", di.state) } if di.refCount != 1 { t.Errorf("expected refCount=1, got %d", di.refCount) @@ -173,13 +173,14 @@ func TestAddToVM_Idempotent(t *testing.T) { if di.refCount != 2 { t.Errorf("expected refCount=2, got %d", di.refCount) } - if di.state != StateAssigned { - t.Errorf("expected StateAssigned, got %v", di.state) + if di.state != StateReady { + t.Errorf("expected StateReady, got %v", di.state) } } -// TestAddToVM_HostFails verifies that a host-side failure leaves the device in -// StateReserved (no transition to StateInvalid) and does not bump the refCount. +// TestAddToVM_HostFails verifies that a host-side failure transitions the device +// to StateRemoved (still tracked in the map) without bumping the refCount. +// The device must be cleaned up via RemoveFromVM. func TestAddToVM_HostFails(t *testing.T) { ctrl := gomock.NewController(t) vm := mocks.NewMockvmVPCI(ctrl) @@ -197,18 +198,44 @@ func TestAddToVM_HostFails(t *testing.T) { t.Fatal("expected error") } - di := c.devices[g] - + // Device must still be tracked (StateRemoved, awaiting RemoveFromVM cleanup). + di, ok := c.devices[g] + if !ok { + t.Fatal("expected device to still be tracked after host failure") + } + if di.state != StateRemoved { + t.Errorf("expected StateRemoved after host failure, got %v", di.state) + } if di.refCount != 0 { t.Errorf("expected refCount=0 after host failure, got %d", di.refCount) } - if di.state != StateReserved { - t.Errorf("expected StateReserved after host failure, got %v", di.state) +} + +// TestAddToVM_StateRemoved verifies that calling AddToVM on a StateRemoved device +// returns an error and does not make any host or guest calls. +func TestAddToVM_StateRemoved(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host-side add fails → StateRemoved. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + _ = c.AddToVM(ctx, g) + + // Second AddToVM: must NOT call host or guest again. + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error for StateRemoved device") } } // TestAddToVM_GuestFails verifies that a guest-side failure marks the device -// StateInvalid and does not bump the refCount. +// StateAssignedInvalid and does not bump the refCount. func TestAddToVM_GuestFails(t *testing.T) { ctrl := gomock.NewController(t) vm := mocks.NewMockvmVPCI(ctrl) @@ -228,15 +255,15 @@ func TestAddToVM_GuestFails(t *testing.T) { } di := c.devices[g] - if di.state != StateInvalid { - t.Errorf("expected StateInvalid after guest failure, got %v", di.state) + if di.state != StateAssignedInvalid { + t.Errorf("expected StateAssignedInvalid after guest failure, got %v", di.state) } if di.refCount != 0 { t.Errorf("expected refCount=0 after guest failure, got %d", di.refCount) } } -// TestAddToVM_InvalidDevice verifies that AddToVM on a StateInvalid device +// TestAddToVM_InvalidDevice verifies that AddToVM on a StateAssignedInvalid device // returns an error and does not attempt a new host/guest call. func TestAddToVM_InvalidDevice(t *testing.T) { ctrl := gomock.NewController(t) @@ -248,7 +275,7 @@ func TestAddToVM_InvalidDevice(t *testing.T) { dev := newTestDevice() g, _ := c.Reserve(ctx, dev) - // First AddToVM: host succeeds, guest fails → StateInvalid. + // First AddToVM: host succeeds, guest fails → StateAssignedInvalid. vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) _ = c.AddToVM(ctx, g) @@ -256,7 +283,7 @@ func TestAddToVM_InvalidDevice(t *testing.T) { // Second AddToVM: must NOT call host or guest again. err := c.AddToVM(ctx, g) if err == nil { - t.Fatal("expected error for StateInvalid device") + t.Fatal("expected error for StateAssignedInvalid device") } } @@ -329,6 +356,36 @@ func TestRemoveFromVM_ReservedButNeverAdded(t *testing.T) { } } +// TestRemoveFromVM_AfterHostAddFails verifies that a device in StateRemoved (due to +// a failed host-side add in AddToVM) can be cleaned up via RemoveFromVM without +// making any host call. +func TestRemoveFromVM_AfterHostAddFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host-side add fails → StateRemoved. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + _ = c.AddToVM(ctx, g) + + // RemoveFromVM must NOT call RemoveDevice (no host-side state to clean up). + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after RemoveFromVM on StateRemoved device") + } + if _, ok := c.deviceToGUID[dev]; ok { + t.Error("deviceToGUID still has entry after RemoveFromVM on StateRemoved device") + } +} + // TestRemoveFromVM_HappyPath verifies a full Reserve → AddToVM → RemoveFromVM cycle. func TestRemoveFromVM_HappyPath(t *testing.T) { ctrl := gomock.NewController(t) @@ -390,7 +447,7 @@ func TestRemoveFromVM_RefCounting(t *testing.T) { } // TestRemoveFromVM_HostFails verifies that a failed host-side remove marks the -// device StateInvalid so it can be retried. +// device StateAssignedInvalid so it can be retried. func TestRemoveFromVM_HostFails(t *testing.T) { ctrl := gomock.NewController(t) vm := mocks.NewMockvmVPCI(ctrl) @@ -413,8 +470,8 @@ func TestRemoveFromVM_HostFails(t *testing.T) { } di := c.devices[g] - if di.state != StateInvalid { - t.Errorf("expected StateInvalid after failed remove, got %v", di.state) + if di.state != StateAssignedInvalid { + t.Errorf("expected StateAssignedInvalid after failed remove, got %v", di.state) } if di.refCount != 0 { t.Errorf("expected refCount=0 after failed remove, got %d", di.refCount) @@ -422,7 +479,7 @@ func TestRemoveFromVM_HostFails(t *testing.T) { } // TestRemoveFromVM_HostFails_ThenRetry verifies that after a failed host remove -// (device is now StateInvalid with refCount=0), a retry via RemoveFromVM +// (device is now StateAssignedInvalid with refCount=0), a retry via RemoveFromVM // succeeds and cleans up the maps. func TestRemoveFromVM_HostFails_ThenRetry(t *testing.T) { ctrl := gomock.NewController(t) @@ -454,7 +511,7 @@ func TestRemoveFromVM_HostFails_ThenRetry(t *testing.T) { } // TestRemoveFromVM_InvalidDevice_AfterGuestFail verifies that a device stuck in -// StateInvalid (due to guest failure in AddToVM) can be cleaned up via RemoveFromVM. +// StateAssignedInvalid (due to guest failure in AddToVM) can be cleaned up via RemoveFromVM. func TestRemoveFromVM_InvalidDevice_AfterGuestFail(t *testing.T) { ctrl := gomock.NewController(t) vm := mocks.NewMockvmVPCI(ctrl) @@ -517,7 +574,7 @@ func TestReserve_AfterRemove(t *testing.T) { } // TestReserve_AfterGuestFailure verifies what Reserve returns for a device that -// is currently StateInvalid (guest failed, host succeeded). +// is currently StateAssignedInvalid (guest failed, host succeeded). // Since the device is still in deviceToGUID, Reserve should return the SAME GUID. func TestReserve_AfterGuestFailure(t *testing.T) { ctrl := gomock.NewController(t) @@ -533,7 +590,7 @@ func TestReserve_AfterGuestFailure(t *testing.T) { guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) _ = c.AddToVM(ctx, g1) - // Device is now StateInvalid and still in deviceToGUID. + // Device is now StateAssignedInvalid and still in deviceToGUID. g2, err := c.Reserve(ctx, dev) if err != nil { t.Fatalf("Reserve after guest failure: %v", err)