diff --git a/pkg/network/deviceinfo/sriov.go b/pkg/network/deviceinfo/sriov.go index 5c8997d4359f..7f3d5a77bad1 100644 --- a/pkg/network/deviceinfo/sriov.go +++ b/pkg/network/deviceinfo/sriov.go @@ -19,4 +19,7 @@ package deviceinfo -const SRIOVAliasPrefix = "sriov-" +const ( + SRIOVAliasPrefix = "sriov-" + DraSRIOVAliasPrefix = "dra-sriov-" +) diff --git a/pkg/virt-config/feature-gates.go b/pkg/virt-config/feature-gates.go index 589151209eac..c7b3f7f772ca 100644 --- a/pkg/virt-config/feature-gates.go +++ b/pkg/virt-config/feature-gates.go @@ -197,6 +197,10 @@ func (config *ClusterConfig) HostDevicesWithDRAEnabled() bool { return config.isFeatureGateEnabled(featuregate.HostDevicesWithDRAGate) } +func (config *ClusterConfig) NetworkDevicesWithDRAEnabled() bool { + return config.isFeatureGateEnabled(featuregate.NetworkDevicesWithDRAGate) +} + func (config *ClusterConfig) ConfigurableHypervisorEnabled() bool { return config.isFeatureGateEnabled(featuregate.ConfigurableHypervisor) } diff --git a/pkg/virt-config/featuregate/active.go b/pkg/virt-config/featuregate/active.go index 9224eb02b32e..ff92a59a2224 100644 --- a/pkg/virt-config/featuregate/active.go +++ b/pkg/virt-config/featuregate/active.go @@ -108,6 +108,12 @@ const ( // HostDevicesWithDRAGate allows users to create VMIs with DRA provisioned Host devices HostDevicesWithDRAGate = "HostDevicesWithDRA" + // Owner: @alaypatel07 + // Alpha: v1.6.0 + // + // NetworkDevicesWithDRAGate allows users to create VMIs with DRA provisioned SR-IOV network devices + NetworkDevicesWithDRAGate = "NetworkDevicesWithDRA" + DecentralizedLiveMigration = "DecentralizedLiveMigration" // Owner: sig-storage / @alromeros @@ -198,6 +204,7 @@ func init() { RegisterFeatureGate(FeatureGate{Name: VirtIOFSStorageVolumeGate, State: Alpha}) RegisterFeatureGate(FeatureGate{Name: GPUsWithDRAGate, State: Alpha}) RegisterFeatureGate(FeatureGate{Name: HostDevicesWithDRAGate, State: Alpha}) + RegisterFeatureGate(FeatureGate{Name: NetworkDevicesWithDRAGate, State: Alpha}) RegisterFeatureGate(FeatureGate{Name: DecentralizedLiveMigration, State: Alpha}) RegisterFeatureGate(FeatureGate{Name: DeclarativeHotplugVolumesGate, State: Alpha}) RegisterFeatureGate(FeatureGate{Name: SecureExecution, State: Beta}) diff --git a/pkg/virt-controller/services/template.go b/pkg/virt-controller/services/template.go index 89f7b7834d25..f8c482225640 100644 --- a/pkg/virt-controller/services/template.go +++ b/pkg/virt-controller/services/template.go @@ -311,6 +311,62 @@ func generateQemuTimeoutWithJitter(qemuTimeoutBaseSeconds int) string { return fmt.Sprintf("%ds", timeout) } +// renderNetworkResourceClaims adds network DRA resource claims to pod spec +func renderNetworkResourceClaims(vmi *v1.VirtualMachineInstance, pod *k8sv1.Pod, clusterConfig *virtconfig.ClusterConfig) { + if !clusterConfig.NetworkDevicesWithDRAEnabled() { + return + } + + if len(vmi.Spec.Networks) == 0 { + return + } + + // Collect unique claim names referenced by networks + networkClaimNames := make(map[string]bool) + for _, network := range vmi.Spec.Networks { + if network.ResourceClaim != nil { + if network.ResourceClaim.ClaimName != nil { + networkClaimNames[*network.ResourceClaim.ClaimName] = true + } + } + } + + if len(networkClaimNames) == 0 { + return + } + + // Note: pod.Spec.ResourceClaims is already populated from vmi.Spec.ResourceClaims at pod creation time (line 735) + // We only need to add container-level references here + + // Add resource claims to container resources + if pod.Spec.Containers == nil || len(pod.Spec.Containers) == 0 { + return + } + + // Find compute container + for i := range pod.Spec.Containers { + if pod.Spec.Containers[i].Name == "compute" { + if pod.Spec.Containers[i].Resources.Claims == nil { + pod.Spec.Containers[i].Resources.Claims = []k8sv1.ResourceClaim{} + } + + // Add claim references to container + for _, network := range vmi.Spec.Networks { + if network.ResourceClaim != nil && network.ResourceClaim.ClaimName != nil && network.ResourceClaim.RequestName != nil { + pod.Spec.Containers[i].Resources.Claims = append( + pod.Spec.Containers[i].Resources.Claims, + k8sv1.ResourceClaim{ + Name: *network.ResourceClaim.ClaimName, + Request: *network.ResourceClaim.RequestName, + }, + ) + } + } + break + } + } +} + func computePodSecurityContext(vmi *v1.VirtualMachineInstance, seccomp *k8sv1.SeccompProfile) *k8sv1.PodSecurityContext { psc := &k8sv1.PodSecurityContext{} @@ -696,6 +752,9 @@ func (t *TemplateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, i pod.Spec.Volumes = append(pod.Spec.Volumes, sidecarVolumes...) + // Add network resource claims to pod + renderNetworkResourceClaims(vmi, &pod, t.clusterConfig) + return &pod, nil } diff --git a/pkg/virt-controller/watch/dra/dra.go b/pkg/virt-controller/watch/dra/dra.go index c89b967b09f9..29ccdbd354b6 100644 --- a/pkg/virt-controller/watch/dra/dra.go +++ b/pkg/virt-controller/watch/dra/dra.go @@ -430,6 +430,23 @@ func (c *DRAStatusController) updateStatus(logger *log.FilteredLogger, vmi *v1.V } } + // Sync network devices (they use hostDeviceStatuses array) + if c.clusterConfig.NetworkDevicesWithDRAEnabled() { + networkDeviceInfo, err := c.getNetworkDevicesFromVMISpec(vmi) + if err != nil { + return err + } + + if len(networkDeviceInfo) > 0 { + networkDeviceStatuses, err := c.getNetworkDeviceStatuses(networkDeviceInfo, pod) + if err != nil { + return err + } + // Append network statuses to hostDeviceStatuses + hostDeviceStatuses = append(hostDeviceStatuses, networkDeviceStatuses...) + } + } + newDeviceStatus := &v1.DeviceStatus{} if gpuStatuses != nil { newDeviceStatus.GPUStatuses = gpuStatuses @@ -709,3 +726,70 @@ func (c *DRAStatusController) getHostDeviceStatus(hostDeviceInfo DeviceInfo, pod return hostDeviceStatus, nil } + +func (c *DRAStatusController) getNetworkDevicesFromVMISpec(vmi *v1.VirtualMachineInstance) ([]DeviceInfo, error) { + var networkDevices []DeviceInfo + for _, network := range vmi.Spec.Networks { + if network.ResourceClaim == nil { + continue + } + if network.ResourceClaim.ClaimName == nil || network.ResourceClaim.RequestName == nil { + continue + } + networkDevices = append(networkDevices, DeviceInfo{ + VMISpecClaimName: *network.ResourceClaim.ClaimName, + VMISpecRequestName: *network.ResourceClaim.RequestName, + DeviceStatusInfo: &v1.DeviceStatusInfo{ + Name: network.Name, + DeviceResourceClaimStatus: nil, + }, + }) + } + return networkDevices, nil +} + +func (c *DRAStatusController) getNetworkDeviceStatuses(networkDeviceInfos []DeviceInfo, pod *k8sv1.Pod) ([]v1.DeviceStatusInfo, error) { + statuses := make([]v1.DeviceStatusInfo, 0, len(networkDeviceInfos)) + for _, info := range networkDeviceInfos { + st, err := c.getNetworkDeviceStatus(info, pod) + if err != nil { + return nil, err + } + statuses = append(statuses, st) + } + return statuses, nil +} + +func (c *DRAStatusController) getNetworkDeviceStatus(networkDeviceInfo DeviceInfo, pod *k8sv1.Pod) (v1.DeviceStatusInfo, error) { + networkDeviceStatus := v1.DeviceStatusInfo{ + Name: networkDeviceInfo.Name, + DeviceResourceClaimStatus: &v1.DeviceResourceClaimStatus{ + ResourceClaimName: getResourceClaimNameForDevice(networkDeviceInfo.VMISpecClaimName, pod), + }, + } + + if networkDeviceStatus.DeviceResourceClaimStatus.ResourceClaimName == nil { + return networkDeviceStatus, nil + } + + device, err := c.getAllocatedDevice(pod.Namespace, *networkDeviceStatus.DeviceResourceClaimStatus.ResourceClaimName, networkDeviceInfo.VMISpecRequestName) + if err != nil { + return networkDeviceStatus, err + } + if device == nil { + return networkDeviceStatus, nil + } + + networkDeviceStatus.DeviceResourceClaimStatus.Name = &device.Device + pciAddress, _, err := c.getDeviceAttributes(pod.Spec.NodeName, device.Device, device.Driver) + if err != nil { + return networkDeviceStatus, err + } + attrs := v1.DeviceAttribute{} + if pciAddress != "" { + attrs.PCIAddress = &pciAddress + } + networkDeviceStatus.DeviceResourceClaimStatus.Attributes = &attrs + + return networkDeviceStatus, nil +} diff --git a/pkg/virt-launcher/virtwrap/device/hostdevice/sriov/hostdev.go b/pkg/virt-launcher/virtwrap/device/hostdevice/sriov/hostdev.go index 061fe84a16bc..5ac3d74f7c00 100644 --- a/pkg/virt-launcher/virtwrap/device/hostdevice/sriov/hostdev.go +++ b/pkg/virt-launcher/virtwrap/device/hostdevice/sriov/hostdev.go @@ -41,27 +41,56 @@ import ( ) func CreateHostDevices(vmi *v1.VirtualMachineInstance) ([]api.HostDevice, error) { - SRIOVInterfaces := vmispec.FilterInterfacesSpec(vmi.Spec.Domain.Devices.Interfaces, func(iface v1.Interface) bool { + var hostDevices []api.HostDevice + + // Handle Multus-based SR-IOV + multusHostDevs, err := createMultusSRIOVHostDevices(vmi) + if err != nil { + return nil, fmt.Errorf("failed to create Multus SR-IOV host devices: %v", err) + } + hostDevices = append(hostDevices, multusHostDevs...) + + // Handle DRA-based SR-IOV + draHostDevs, err := createDRASRIOVHostDevices(vmi) + if err != nil { + return nil, fmt.Errorf("failed to create DRA SR-IOV host devices: %v", err) + } + hostDevices = append(hostDevices, draHostDevs...) + + return hostDevices, nil +} + +// createMultusSRIOVHostDevices creates SR-IOV host devices for Multus-based networks +func createMultusSRIOVHostDevices(vmi *v1.VirtualMachineInstance) ([]api.HostDevice, error) { + // Filter Multus-based SR-IOV interfaces + multusInterfaces := vmispec.FilterInterfacesSpec(vmi.Spec.Domain.Devices.Interfaces, func(iface v1.Interface) bool { if iface.SRIOV == nil { return false } + network := findNetworkByName(vmi, iface.Name) + if network == nil || network.Multus == nil { + return false + } + // Only process Multus interfaces that have InfoSourceMultusStatus ifaceStatus := vmispec.LookupInterfaceStatusByName(vmi.Status.Interfaces, iface.Name) return ifaceStatus != nil && vmispec.ContainsInfoSource(ifaceStatus.InfoSource, vmispec.InfoSourceMultusStatus) }) - if len(SRIOVInterfaces) == 0 { + + if len(multusInterfaces) == 0 { return []api.HostDevice{}, nil } + netStatusPath := path.Join(downwardapi.MountPath, downwardapi.NetworkInfoVolumePath) pciAddressPoolWithNetworkStatus, err := newPCIAddressPoolWithNetworkStatusFromFile(netStatusPath) if err != nil { - return nil, fmt.Errorf("failed to create SR-IOV hostdevices: %v", err) + return nil, fmt.Errorf("failed to create Multus SR-IOV hostdevices: %v", err) } if pciAddressPoolWithNetworkStatus.Len() == 0 { - log.Log.Object(vmi).Warningf("found no SR-IOV networks to PCI-Address mapping.") - return nil, fmt.Errorf("found no SR-IOV networks to PCI-Address mapping") + log.Log.Object(vmi).Warningf("found no Multus SR-IOV networks to PCI-Address mapping.") + return nil, fmt.Errorf("found no Multus SR-IOV networks to PCI-Address mapping") } - return CreateHostDevicesFromIfacesAndPool(SRIOVInterfaces, pciAddressPoolWithNetworkStatus) + return CreateHostDevicesFromIfacesAndPool(multusInterfaces, pciAddressPoolWithNetworkStatus) } // newPCIAddressPoolWithNetworkStatusFromFile polls the given file path until populated, then uses it to create the @@ -156,3 +185,156 @@ func GetHostDevicesToAttach(vmi *v1.VirtualMachineInstance, domainSpec *api.Doma return sriovHostDevicesToAttach, nil } + +// createDRASRIOVHostDevices creates host devices for DRA-based SR-IOV networks +func createDRASRIOVHostDevices(vmi *v1.VirtualMachineInstance) ([]api.HostDevice, error) { + // Filter DRA-based SR-IOV interfaces + draInterfaces := vmispec.FilterInterfacesSpec(vmi.Spec.Domain.Devices.Interfaces, func(iface v1.Interface) bool { + if iface.SRIOV == nil { + return false + } + network := findNetworkByName(vmi, iface.Name) + return network != nil && network.ResourceClaim != nil + }) + + if len(draInterfaces) == 0 { + return []api.HostDevice{}, nil + } + + var hostDevices []api.HostDevice + + for _, iface := range draInterfaces { + network := findNetworkByName(vmi, iface.Name) + if network == nil || network.ResourceClaim == nil { + continue + } + + // Find device status entry by network name + deviceStatus := findDeviceStatusByName(vmi, network.Name) + if deviceStatus == nil { + return nil, fmt.Errorf("device status not found for DRA network %s", network.Name) + } + + if deviceStatus.DeviceResourceClaimStatus == nil { + return nil, fmt.Errorf("device resource claim status not populated for network %s", network.Name) + } + + if deviceStatus.DeviceResourceClaimStatus.Attributes == nil { + return nil, fmt.Errorf("device attributes not populated for network %s", network.Name) + } + + // Extract PCI address + pciAddress := deviceStatus.DeviceResourceClaimStatus.Attributes.PCIAddress + if pciAddress == nil || *pciAddress == "" { + return nil, fmt.Errorf("PCI address not found for DRA network %s", network.Name) + } + + // Parse PCI address (format: 0000:05:00.1) + address, err := parsePCIAddress(*pciAddress) + if err != nil { + return nil, fmt.Errorf("invalid PCI address %s for network %s: %v", *pciAddress, network.Name, err) + } + + // Create hostdev with DRA-specific alias + hostDev := api.HostDevice{ + Alias: api.NewUserDefinedAlias(deviceinfo.DraSRIOVAliasPrefix + iface.Name), + Source: api.HostDeviceSource{ + Address: &api.Address{ + Type: "pci", + Domain: address.Domain, + Bus: address.Bus, + Slot: address.Slot, + Function: address.Function, + }, + }, + Type: "pci", + Managed: "no", + } + + // Apply additional decorations (boot order, guest PCI address) + decorateHook := newDecorateHook(iface) + if err := decorateHook(&hostDev); err != nil { + return nil, fmt.Errorf("failed to decorate DRA SR-IOV host device for %s: %v", iface.Name, err) + } + + hostDevices = append(hostDevices, hostDev) + } + + return hostDevices, nil +} + +// Helper functions + +func findNetworkByName(vmi *v1.VirtualMachineInstance, name string) *v1.Network { + for i := range vmi.Spec.Networks { + if vmi.Spec.Networks[i].Name == name { + return &vmi.Spec.Networks[i] + } + } + return nil +} + +func findDeviceStatusByName(vmi *v1.VirtualMachineInstance, name string) *v1.DeviceStatusInfo { + if vmi.Status.DeviceStatus == nil { + return nil + } + + for i := range vmi.Status.DeviceStatus.HostDeviceStatuses { + if vmi.Status.DeviceStatus.HostDeviceStatuses[i].Name == name { + return &vmi.Status.DeviceStatus.HostDeviceStatuses[i] + } + } + return nil +} + +// PCIAddress represents parsed PCI address components +type PCIAddress struct { + Domain string + Bus string + Slot string + Function string +} + +// parsePCIAddress parses PCI address string (format: 0000:05:00.1) +func parsePCIAddress(addr string) (*PCIAddress, error) { + // Split by colon + parts := []string{} + current := "" + for _, char := range addr { + if char == ':' { + parts = append(parts, current) + current = "" + } else { + current += string(char) + } + } + parts = append(parts, current) + + if len(parts) != 3 { + return nil, fmt.Errorf("invalid PCI address format: %s (expected format: 0000:05:00.1)", addr) + } + + // Split last part by dot + slotFunc := []string{} + current = "" + for _, char := range parts[2] { + if char == '.' { + slotFunc = append(slotFunc, current) + current = "" + } else { + current += string(char) + } + } + slotFunc = append(slotFunc, current) + + if len(slotFunc) != 2 { + return nil, fmt.Errorf("invalid PCI address format: %s (expected slot.function)", addr) + } + + return &PCIAddress{ + Domain: "0x" + parts[0], + Bus: "0x" + parts[1], + Slot: "0x" + slotFunc[0], + Function: "0x" + slotFunc[1], + }, nil +} diff --git a/staging/src/kubevirt.io/api/core/v1/schema.go b/staging/src/kubevirt.io/api/core/v1/schema.go index 49df3125d8cc..11413f699ae6 100644 --- a/staging/src/kubevirt.io/api/core/v1/schema.go +++ b/staging/src/kubevirt.io/api/core/v1/schema.go @@ -1655,8 +1655,9 @@ type Network struct { // Represents the source resource that will be connected to the vm. // Only one of its members may be specified. type NetworkSource struct { - Pod *PodNetwork `json:"pod,omitempty"` - Multus *MultusNetwork `json:"multus,omitempty"` + Pod *PodNetwork `json:"pod,omitempty"` + Multus *MultusNetwork `json:"multus,omitempty"` + ResourceClaim *ClaimRequest `json:"resourceClaim,omitempty"` } // Represents the stock pod network interface.