-
Notifications
You must be signed in to change notification settings - Fork 281
[shimV2] adds vpci device controller #2643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| //go:build windows | ||
|
|
||
| // Package vpci provides a controller for managing virtual PCI (vPCI) device | ||
| // assignments on a Utility VM (UVM). It handles assigning and removing | ||
| // PCI devices from the UVM via HCS modify calls. | ||
| // | ||
| // # Lifecycle | ||
| // | ||
| // [Controller] tracks active device assignments by VMBus GUID (device identifier | ||
| // within UVM) in an internal map. Each assignment is reference-counted to | ||
| // support shared access by multiple callers. | ||
| // | ||
| // - [Controller.Reserve] generates a unique VMBus GUID for a device and | ||
| // records the reservation. If the same device is already reserved, the | ||
| // existing GUID is returned. | ||
| // - [Controller.AddToVM] assigns a previously reserved device to the VM | ||
| // using the VMBus GUID returned by Reserve. If the device is already | ||
| // assigned, the reference count is incremented and the call succeeds | ||
| // without a second host-side assignment. | ||
| // - [Controller.RemoveFromVM] decrements the reference count for the device | ||
| // identified by VMBus GUID. When it reaches zero, the device is removed | ||
| // from the VM. It also handles cleanup for devices that were reserved | ||
| // but never assigned. | ||
| // | ||
| // # Invalid Devices | ||
| // | ||
| // If the host-side assignment succeeds but the guest-side notification fails, | ||
| // the device is marked invalid. It remains tracked so that the caller can call | ||
| // [Controller.RemoveFromVM] to perform host-side cleanup. | ||
| // | ||
| // # Virtual Functions | ||
| // | ||
| // Each Virtual Function is assigned as an independent guest device with its own | ||
| // VMBus GUID. Multiple Virtual Functions on the same physical device are treated | ||
| // as separate devices in the guest. | ||
| // | ||
| // # Guest Requests | ||
| // | ||
| // On LCOW, assigning a vPCI device requires a guest-side notification so the | ||
| // GCS can wait for the required device paths to become available. | ||
| // WCOW does not require a guest request as part of device assignment. | ||
| package vpci |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| //go:build windows | ||
|
|
||
| package vpci | ||
|
|
||
| import ( | ||
| "context" | ||
|
|
||
| "github.com/Microsoft/go-winio/pkg/guid" | ||
|
|
||
| hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" | ||
| "github.com/Microsoft/hcsshim/internal/protocol/guestresource" | ||
| ) | ||
|
|
||
| // Device holds the configuration required to assign a vPCI device to the VM. | ||
| type Device struct { | ||
| // DeviceInstanceID is the host device instance path of the vPCI device. | ||
| DeviceInstanceID string | ||
|
|
||
| // VirtualFunctionIndex is the SR-IOV virtual function index to assign. | ||
| VirtualFunctionIndex uint16 | ||
| } | ||
|
|
||
| // vmVPCI manages adding and removing vPCI devices for a Utility VM. | ||
| // Implemented by [vmmanager.UtilityVM]. | ||
| type vmVPCI interface { | ||
| // AddDevice adds a vPCI device identified by `vmBusGUID` to the Utility VM with the provided settings. | ||
| AddDevice(ctx context.Context, vmBusGUID string, settings hcsschema.VirtualPciDevice) error | ||
|
|
||
| // RemoveDevice removes the vPCI device identified by `vmBusGUID` from the Utility VM. | ||
| RemoveDevice(ctx context.Context, vmBusGUID string) error | ||
| } | ||
|
|
||
| // linuxGuestVPCI exposes vPCI device operations in the LCOW guest. | ||
| // Implemented by [guestmanager.Guest]. | ||
| type linuxGuestVPCI interface { | ||
| // AddVPCIDevice adds a vPCI device to the guest. | ||
| AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error | ||
| } | ||
|
|
||
| // ============================================================================== | ||
| // INTERNAL DATA STRUCTURES | ||
| // ============================================================================== | ||
|
|
||
| // deviceInfo records one vPCI device's assignment state and reference count. | ||
| type deviceInfo struct { | ||
| // device is the immutable host device identifier used to detect duplicate | ||
| // assignment requests. | ||
| device Device | ||
|
|
||
| // vmBusGUID identifies the vPCI device (backed by a VMBus channel) | ||
| // inside the UVM. | ||
| vmBusGUID guid.GUID | ||
|
|
||
| // refCount is the number of active callers sharing this device. | ||
| // Access must be guarded by [Controller.mu]. | ||
| refCount uint32 | ||
|
|
||
| // invalid indicates the host-side assignment succeeded but the guest-side | ||
| // assignment failed. Access must be guarded by [Controller.mu]. | ||
| invalid bool | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,217 @@ | ||
| //go:build windows | ||
|
|
||
| package vpci | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "sync" | ||
|
|
||
| "github.com/Microsoft/go-winio/pkg/guid" | ||
| "github.com/Microsoft/hcsshim/internal/logfields" | ||
| "github.com/sirupsen/logrus" | ||
|
|
||
| hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" | ||
| "github.com/Microsoft/hcsshim/internal/log" | ||
| ) | ||
|
|
||
| // Controller manages vPCI device assignments for a Utility VM. | ||
| type Controller struct { | ||
| mu sync.Mutex | ||
|
|
||
| // devices tracks currently assigned vPCI devices, keyed by VMBus GUID. | ||
| // Guarded by mu. | ||
| devices map[guid.GUID]*deviceInfo | ||
|
|
||
| // deviceToGUID maps a [Device] to its VMBus GUID for duplicate detection | ||
| // during [Controller.Reserve]. Guarded by mu. | ||
| deviceToGUID map[Device]guid.GUID | ||
|
|
||
| // vmVPCI performs host-side vPCI device add/remove on the VM. | ||
| vmVPCI vmVPCI | ||
|
|
||
| // linuxGuestVPCI performs guest-side vPCI device setup for LCOW. | ||
| linuxGuestVPCI linuxGuestVPCI | ||
| } | ||
|
|
||
| // New creates a ready-to-use [Controller]. | ||
| func New( | ||
| vmVPCI vmVPCI, | ||
| linuxGuestVPCI linuxGuestVPCI, | ||
| ) *Controller { | ||
| return &Controller{ | ||
| vmVPCI: vmVPCI, | ||
| linuxGuestVPCI: linuxGuestVPCI, | ||
| devices: make(map[guid.GUID]*deviceInfo), | ||
| deviceToGUID: make(map[Device]guid.GUID), | ||
| } | ||
| } | ||
|
|
||
| // Reserve generates a unique VMBus GUID for the given vPCI device and records | ||
| // the reservation. The returned GUID can later be passed to [Controller.AddToVM] | ||
| // to actually assign the device to the VM. | ||
| // | ||
| // If the same device (identified by DeviceInstanceID and VirtualFunctionIndex) has | ||
| // already been reserved, the existing GUID is returned. | ||
| // | ||
| // Each Virtual Function is assigned as an independent guest device with its own | ||
| // VMBus GUID. Multiple Virtual Functions on the same physical device are treated | ||
| // as separate devices. | ||
| func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, error) { | ||
| ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ | ||
| logfields.DeviceID: device.DeviceInstanceID, | ||
| logfields.VFIndex: device.VirtualFunctionIndex, | ||
| })) | ||
|
|
||
| c.mu.Lock() | ||
| defer c.mu.Unlock() | ||
|
|
||
| // If this device is already reserved, return the existing GUID. | ||
| if existingGUID, ok := c.deviceToGUID[device]; ok { | ||
| log.G(ctx).WithField(logfields.VMBusGUID, existingGUID).Debug("vPCI device already reserved, reusing existing GUID") | ||
| return existingGUID, nil | ||
| } | ||
|
|
||
| // Generate a new VMBus GUID for this device. | ||
| vmBusGUID, err := guid.NewV4() | ||
| if err != nil { | ||
| return guid.GUID{}, fmt.Errorf("generate vmbus guid for device %s: %w", device.DeviceInstanceID, err) | ||
| } | ||
|
|
||
| c.devices[vmBusGUID] = &deviceInfo{ | ||
| device: device, | ||
| vmBusGUID: vmBusGUID, | ||
| } | ||
| c.deviceToGUID[device] = vmBusGUID | ||
|
|
||
| log.G(ctx).WithField(logfields.VMBusGUID, vmBusGUID).Debug("reserved vPCI device with new VMBus GUID") | ||
| return vmBusGUID, nil | ||
| } | ||
|
|
||
| // AddToVM assigns a previously reserved vPCI device to the VM. | ||
| // The vmBusGUID must have been obtained from a prior call to [Controller.Reserve]. | ||
| // If the device is already assigned to the VM, the existing assignment is reused. | ||
| func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { | ||
| // Set vmBusGUID in logging context. | ||
| ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) | ||
|
|
||
| c.mu.Lock() | ||
| defer c.mu.Unlock() | ||
|
|
||
| dev, ok := c.devices[vmBusGUID] | ||
| if !ok { | ||
| return fmt.Errorf("no reservation found for vmBusGUID %s; call Reserve first", vmBusGUID) | ||
| } | ||
|
|
||
| // If a previous assignment left the device in an invalid state, | ||
| // reject new callers until the existing assignment is cleaned up. | ||
| if dev.invalid { | ||
| return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state", vmBusGUID) | ||
| } | ||
|
|
||
| ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ | ||
| logfields.DeviceID: dev.device.DeviceInstanceID, | ||
| logfields.VFIndex: dev.device.VirtualFunctionIndex, | ||
| })) | ||
|
|
||
| // If the device is already assigned to the VM (host-side call was already made), | ||
| // just bump the reference count and return. | ||
| if dev.refCount > 0 { | ||
| dev.refCount++ | ||
|
|
||
| log.G(ctx).Debug("vPCI device already assigned, reusing existing assignment") | ||
| return nil | ||
| } | ||
|
|
||
| // Device not yet attached to VM. | ||
| log.G(ctx).Debug("assigning vPCI device to VM") | ||
|
|
||
| // NUMA affinity is always propagated for assigned devices. | ||
| // This feature is available on WS2025 and later. | ||
| // Since the V2 shims only support WS2025 and later, this is set as true. | ||
| propagateAffinity := true | ||
|
|
||
| settings := hcsschema.VirtualPciDevice{ | ||
| Functions: []hcsschema.VirtualPciFunction{ | ||
| { | ||
| DeviceInstancePath: dev.device.DeviceInstanceID, | ||
| VirtualFunction: dev.device.VirtualFunctionIndex, | ||
| }, | ||
| }, | ||
| PropagateNumaAffinity: &propagateAffinity, | ||
| } | ||
|
|
||
| guidStr := vmBusGUID.String() | ||
|
|
||
| // Host-side: add the vPCI device to the VM. | ||
| if err := c.vmVPCI.AddDevice(ctx, guidStr, settings); err != nil { | ||
|
||
| return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) | ||
| } | ||
|
|
||
| // Update the ref count to indicate the device is now assigned to the VM. | ||
| dev.refCount++ | ||
|
||
|
|
||
| // Guest-side: device attach notification. | ||
| if err := c.waitGuestDeviceReady(ctx, guidStr); err != nil { | ||
| // Mark the device as invalid so the caller can call RemoveFromVM | ||
| // to clean up the host-side assignment. | ||
| dev.invalid = true | ||
| log.G(ctx).WithError(err).Error("guest-side vpci device setup failed, device marked invalid") | ||
| return fmt.Errorf("add guest vpci device with vmBusGUID %s to vm: %w", vmBusGUID, err) | ||
| } | ||
|
|
||
| log.G(ctx).Info("vPCI device assigned to VM") | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // RemoveFromVM removes a vPCI device from the VM. | ||
| // If the device is shared (reference count > 1), the reference count is | ||
| // decremented without actually removing the device from the VM. | ||
| func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) error { | ||
| c.mu.Lock() | ||
| defer c.mu.Unlock() | ||
|
|
||
| ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) | ||
|
|
||
| dev, ok := c.devices[vmBusGUID] | ||
| if !ok { | ||
| return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) | ||
| } | ||
|
|
||
| // Device was reserved but never added to the VM. Just clean up the reservation. | ||
| if dev.refCount == 0 { | ||
| log.G(ctx).Debug("vPCI device was reserved but never assigned, cleaning up reservation") | ||
|
|
||
| delete(c.devices, vmBusGUID) | ||
| delete(c.deviceToGUID, dev.device) | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // Decrement the ref count for the device. | ||
| dev.refCount-- | ||
|
||
| if dev.refCount > 0 { | ||
| log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") | ||
| return nil | ||
| } | ||
|
|
||
| // Last reference dropped (refCount == 0). Remove the device from the VM. | ||
| // This also covers devices marked invalid during AddToVM — the host-side | ||
| // assignment still needs to be cleaned up. | ||
|
|
||
| log.G(ctx).Debug("removing vPCI device from VM") | ||
|
|
||
| // Host-side: remove the vPCI device from the VM. | ||
| if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID.String()); err != nil { | ||
| // Restore the ref count since the removal failed. | ||
| dev.refCount++ | ||
| return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) | ||
| } | ||
|
|
||
| delete(c.devices, vmBusGUID) | ||
| delete(c.deviceToGUID, dev.device) | ||
|
|
||
| log.G(ctx).Info("vPCI device removed from VM") | ||
| return nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| //go:build windows && lcow | ||
|
|
||
| package vpci | ||
|
|
||
| import ( | ||
| "context" | ||
|
|
||
| "github.com/Microsoft/hcsshim/internal/protocol/guestresource" | ||
| ) | ||
|
|
||
| // waitGuestDeviceReady notifies the guest about the new device and blocks until | ||
| // the required sysfs/device paths are available before workloads use them. | ||
| func (c *Controller) waitGuestDeviceReady(ctx context.Context, vmBusGUID string) error { | ||
| return c.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ | ||
| VMBusGUID: vmBusGUID, | ||
| }) | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.