diff --git a/docs/run_test/platform.rst b/docs/run_test/platform.rst index 81809c24f2..89ac85060c 100644 --- a/docs/run_test/platform.rst +++ b/docs/run_test/platform.rst @@ -235,14 +235,35 @@ deployment. created and the resulting virtual network name will be ``. If `virtual_network_resource_group` is provided, an existing virtual network, with the name equal to `virtual_network_name`, - will be used. + will be used. `virtual_network_resource_group` does not change the behavior for + test resource group creation. Instead, the subnets in the test RGs vnet will be + peered to a subnet within the RG `virtual_network_resource_group`. Peering requires + the subnets and vnet have no address space collisions; a default schema + assuming a remote subnet named 'default' with the address space 10.255.255.0/24 is used + to allow a large number of test resource groups to be created without collisions. + Virtual networks in the test environments will set the default subnet prefix of: + 10.$(environment_id/256).$(environment_id%256).0/24; any additional subnets will use the prefix + 192.168.${nic_index}.0/24. + This scheme allows for 0xFFFE test environments with up to 256 nics per VM; however, + Azure will likely restrict these numbers to something smaller than the maximum. + LISA does not remove the subnet peerings from the remote vnet in `virtual_network_resource_group`. + The total number of test environments will be limited based on the allowed active subnet peerings per vnet. + This complex behavior is intended to enable testing without exposing a public IP address; + since a VM in `virtual_network_resource_group` will be able to access the test environments + via SSH on the private network. This assumes an automated deployment of this orchestrator resource group, + VM and virtual network with the expected default values. '`` and `` are respected. + Note that usage of `` with `virtual_network_resource_group` will likely result in address space + collisions and failed deployments. Similarly; use of `` with this option will likely + result in failed test resource deployments. * **subnet_prefix**. Specify the desired subnet prefix. If `virtual_network_resource_group` is not provided, a virtual network and subnet will be created and the resulting subnets will look like - `0`, `1`, and so on. If - `virtual_network_resource_group` is provided, an existing virtual network and - subnet, with the name equal to `subnet_prefix`, will be used. -* **use_public_address**. True means to connect to the Azure VMs with their + `0`, `1`, and so on. + The '`` option will likely conflict with the use of ``. + LISA will warn of this configuration but allow it's use, see notes for `` + for more details. + + * **use_public_address**. True means to connect to the Azure VMs with their public IP addresses. False means to connect with the private IP addresses. If not provided, the connections will default to using the public IP addresses. diff --git a/lisa/microsoft/runbook/azure.yml b/lisa/microsoft/runbook/azure.yml index 257f8ee3c3..17f3a80475 100644 --- a/lisa/microsoft/runbook/azure.yml +++ b/lisa/microsoft/runbook/azure.yml @@ -45,14 +45,14 @@ variable: value: "" - name: virtual_network_name value: "" - - name: subnet_prefix - value: "" - name: use_public_address value: true - name: create_public_address value: true - name: resource_group_tags value: null + - name: subnet_prefix + value: "" # Example usage: # resource_group_tags: # Environment: Testing diff --git a/lisa/microsoft/testsuites/dpdk/dpdkutil.py b/lisa/microsoft/testsuites/dpdk/dpdkutil.py index 0274be7288..69a444fe50 100644 --- a/lisa/microsoft/testsuites/dpdk/dpdkutil.py +++ b/lisa/microsoft/testsuites/dpdk/dpdkutil.py @@ -1,3 +1,4 @@ +import ipaddress import itertools import re import time @@ -259,23 +260,23 @@ def generate_testpmd_multiple_port_command( use_service_cores: int = 1, set_mtu: int = 0, ) -> Dict[DpdkTestResources, str]: - # for N senders, make a list of subnets from - # 10.0.1.0/24 to 10.0.N.0/24. - # these can be arbitrarily picked, each VM has nics on each - # subnets, so it doesn't matter which is picked for each VM - # as long as the senders are on distinct subnets. - subnets = [] - for i in range(len(senders)): - subnets += [f"10.0.{i + 1}.0/24"] + # make a list of nics on each non-primary subnet on the receiver + # these can be arbitrarily picked, each VM must have nics on each + # subnet for the test to run, so as long as we exclude the ssh + # interface, it doesn't matter which is picked as long as the mapping + # is consistent between senders and receiver. + subnets = [ + subnet for subnet in receiver.node.nics.get_node_subnets(include_primary=False) + ] sender_nics: Dict[DpdkTestResources, NicInfo] = dict() receiver_nics: Dict[str, NicInfo] = dict() for i in range(len(senders)): # pick one nic per subnet for the senders subnet = subnets[i] # defined above as "10.0.{i + 1}.0/24" sender = senders[i] - sender_nics[sender] = sender.node.nics.get_nic_by_subnet(subnet) + sender_nics[sender] = sender.node.nics.get_nic_by_subnet(str(subnet)) # and the corresponding nic on the receiver for that subnet. - receiver_nics[subnet] = receiver.node.nics.get_nic_by_subnet(subnet) + receiver_nics[str(subnet)] = receiver.node.nics.get_nic_by_subnet(str(subnet)) # for MTU test: check that we can fetch the max MTU size for the NIC if set_mtu: @@ -298,13 +299,13 @@ def generate_testpmd_multiple_port_command( for i in range(len(senders)): # get the sender sender = senders[i] - sender_subnet = subnets[i] # defined above as "10.0.{i + 1}.0/24" + sender_subnet = subnets[i] # will be something like 10.X.Y.0/24 # get the sender nic we picked sender_nic = sender_nics[sender] # get the subnet for that nic (follows the pattern from before) # get the corresponding receiver nic for that subnet - receiver_nic = receiver_nics[sender_subnet] + receiver_nic = receiver_nics[str(sender_subnet)] # generate the command for the sender snd_cmd = sender.testpmd.generate_testpmd_command( [sender_nic], @@ -320,7 +321,7 @@ def generate_testpmd_multiple_port_command( kit_cmd_pairs[sender] = snd_cmd # receiver needs multiple ports, so only generate the include. receiver_include = receiver.testpmd.generate_testpmd_include( - receiver_nics[sender_subnet], i + receiver_nics[str(sender_subnet)], i ) # and save it receiver_includes += [receiver_include] @@ -484,8 +485,13 @@ def initialize_node_resources( assert_that(len(node.nics)).described_as( "Test needs at least 1 NIC on the test node." ).is_greater_than_or_equal_to(1) - - test_nic = node.nics.get_nic_by_subnet("10.0.1.0/24") + # get a sorted list of subnets for the nics on the node, + # excluding the primary nic subnet, + # and pick the one on the lowest subnet as the testing default. + subnets = sorted( + [str(subnet) for subnet in node.nics.get_node_subnets(include_primary=False)] + ) + test_nic = node.nics.get_nic_by_subnet(str(subnets[0])) # check an assumption that our nics are bound to hv_netvsc # at test start. @@ -869,13 +875,17 @@ def verify_dpdk_mutliple_ports( (f"receiver:{external_ips[0]}\nsenders:{external_ips[1]},{external_ips[2]}\n") ) receiver, sender_a, sender_b = environment.nodes.list() + subnets = receiver.nics.get_node_subnets(include_primary=False) + subnet_a, subnet_b = subnets[:2] # will be something like 10.X.Y.0/24 + # note: will assert if there are no nics on corresponding subnets, + # this is good and proper since we can't run without that setup. nic_pairings = { receiver: [ - receiver.nics.get_nic_by_subnet("10.0.1.0/24"), - receiver.nics.get_nic_by_subnet("10.0.2.0/24"), + receiver.nics.get_nic_by_subnet(str(subnet_a)), + receiver.nics.get_nic_by_subnet(str(subnet_b)), ], - sender_a: [sender_a.nics.get_nic_by_subnet("10.0.1.0/24")], - sender_b: [sender_b.nics.get_nic_by_subnet("10.0.2.0/24")], + sender_a: [sender_a.nics.get_nic_by_subnet(str(subnet_a))], + sender_b: [sender_b.nics.get_nic_by_subnet(str(subnet_b))], } # get test duration variable if set # enables long-running tests to shakeQoS and SLB issue @@ -986,10 +996,12 @@ def ipv4_to_lpm(addr: str) -> str: # enable ip forwarding for secondary and tertiary nics in this test. # run in parallel to save a bit of time on this net io step. def __enable_ip_forwarding(node: Node) -> None: - fwd_subnets = [ - node.nics.get_nic_by_index(nic_index).ip_addr for nic_index in [1, 2] - ] - for subnet_ip in fwd_subnets: + fwd_subnets = node.nics.get_node_subnets(include_primary=False) + subnet_nics = [node.nics.get_nic_by_subnet(str(subnet)) for subnet in fwd_subnets] + subnet_ips = [nic.ip_addr for nic in subnet_nics] + + for subnet_ip in subnet_ips: + node.log.debug(f"Enabling IP forwarding for nic on subnet {subnet_ip}") node.features[NetworkInterface].switch_ip_forwarding( enable=True, private_ip_addr=subnet_ip ) @@ -1115,22 +1127,8 @@ def verify_dpdk_l3fwd_ntttcp_tcp( # 3. enjoy the thrill of victory, ship a cloud net applicance. l3fwd_app_name = "l3fwd" - # pick fwd/send/receive nodes based on well known addresses in our subnets - forwarder = [ - node - for node in environment.nodes.list() - if node.nics.get_primary_nic().ip_addr.endswith("4") - ][0] - sender = [ - node - for node in environment.nodes.list() - if node.nics.get_primary_nic().ip_addr.endswith("5") - ][0] - receiver = [ - node - for node in environment.nodes.list() - if node.nics.get_primary_nic().ip_addr.endswith("6") - ][0] + + forwarder, sender, receiver = environment.nodes.list() if not ( forwarder.tools[Lscpu].get_architecture() == CpuArchitecture.X64 @@ -1172,15 +1170,22 @@ def verify_dpdk_l3fwd_ntttcp_tcp( forwarder.log.debug(f"fwd: {str(forwarder.nics)}") receiver.log.debug(f"rcv: {str(receiver.nics)}") sender.log.debug(f"snd: {str(sender.nics)}") + subnets = forwarder.nics.get_node_subnets(include_primary=False) + if len(subnets) != 2: + raise SkippedException( + "Expected exactly 2 non-primary subnets for this test. " + f"Found subnets: {subnets}" + ) + subnet_a, subnet_b = subnets subnet_a_nics = { - forwarder: forwarder.nics.get_nic_by_subnet("10.0.1.0/24"), - sender: sender.nics.get_nic_by_subnet("10.0.1.0/24"), - receiver: receiver.nics.get_nic_by_subnet("10.0.1.0/24"), + forwarder: forwarder.nics.get_nic_by_subnet(str(subnet_a)), + sender: sender.nics.get_nic_by_subnet(str(subnet_a)), + receiver: receiver.nics.get_nic_by_subnet(str(subnet_a)), } subnet_b_nics = { - forwarder: forwarder.nics.get_nic_by_subnet("10.0.2.0/24"), - receiver: receiver.nics.get_nic_by_subnet("10.0.2.0/24"), - sender: sender.nics.get_nic_by_subnet("10.0.2.0/24"), + forwarder: forwarder.nics.get_nic_by_subnet(str(subnet_b)), + receiver: receiver.nics.get_nic_by_subnet(str(subnet_b)), + sender: sender.nics.get_nic_by_subnet(str(subnet_b)), } # We use ntttcp for snd/rcv which will respect the kernel route table. diff --git a/lisa/nic.py b/lisa/nic.py index 7e91a99396..b24fcbabea 100644 --- a/lisa/nic.py +++ b/lisa/nic.py @@ -8,7 +8,7 @@ from collections import OrderedDict from dataclasses import dataclass from pathlib import PurePosixPath -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from assertpy import assert_that from retry import retry @@ -44,6 +44,10 @@ def __init__( self.pci_slot = pci_slot self.dev_uuid = "" self.module_name = "" + self.subnet: Optional[ + Union[ipaddress.IPv4Network, ipaddress.IPv6Network] + ] = None + if driver_sysfs_path is None: self.driver_sysfs_path = PurePosixPath("") else: @@ -60,6 +64,15 @@ def __str__(self) -> str: f"dev_uuid: {self.dev_uuid}\n" ) + def get_subnet(self) -> Union[ipaddress.IPv4Network, ipaddress.IPv6Network]: + # get the subnet for this nic, assuming a mask of /24 + # note: if the bicep template changes to assign different masks for subnet_prefix, + # this function will need to be updated to + if self.subnet: + return self.subnet + else: + raise LisaException(f"No subnet information available for {self.name} ") + @property def is_pci_module_enabled(self) -> bool: """ @@ -304,6 +317,19 @@ def get_nic_by_subnet(self, subnet: str) -> NicInfo: return nic raise LisaException(f"Could not find a nic for requested subnet: {subnet}") + # get a list of all subnets associated with the nics on this node, + # with the option to include the primary nic subnet or not. + def get_node_subnets( + self, include_primary: bool = True + ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]: + return list( + [ + nic.subnet + for nic in self.nics.values() + if nic.subnet and (include_primary or nic.name != self.default_nic) + ] + ) + def unbind(self, nic: NicInfo) -> None: # unbind nic from current driver and return the old sysfs path tee = self._node.tools[Tee] @@ -341,6 +367,7 @@ def load_nics_info(self, nic_name: Optional[str] = None) -> None: nic_entry = self.nics[nic_name] nic_entry.ip_addr = ip_addr nic_entry.mac_addr = mac + nic_entry.subnet = nic_info.subnet found_nics.append(nic_name) if not nic_name: diff --git a/lisa/sut_orchestrator/azure/arm_template.bicep b/lisa/sut_orchestrator/azure/arm_template.bicep index 34ce7bd676..127cbc1ea2 100644 --- a/lisa/sut_orchestrator/azure/arm_template.bicep +++ b/lisa/sut_orchestrator/azure/arm_template.bicep @@ -22,6 +22,12 @@ param shared_resource_group_name string @description('created subnet count') param subnet_count int +@description('user supplied subnet prefix (incompatible with virtual_network_resource_group)') +param subnet_prefix string + +@description('index of the test resource group for shared vnet subnet mapping') +param resource_group_index int + @description('options for availability sets, zones, and VMSS') param availability_options object @@ -31,9 +37,6 @@ param virtual_network_resource_group string @description('the name of vnet') param virtual_network_name string -@description('the prefix of the subnets') -param subnet_prefix string - @description('tags of virtual machine') param vm_tags object @@ -64,10 +67,13 @@ param source_address_prefixes array @description('Generate public IP address for each node') param create_public_address bool -var vnet_id = virtual_network_name_resource.id + var node_count = length(nodes) var availability_set_name_value = 'lisa-availabilitySet' -var existing_subnet_ref = (empty(virtual_network_resource_group) ? '' : resourceId(virtual_network_resource_group, 'Microsoft.Network/virtualNetworks/subnets', virtual_network_name, subnet_prefix)) +var rg_index_mod_256 = resource_group_index % 256 +var rg_index_div_256 = resource_group_index / 256 +var use_existing_vnet = !empty(virtual_network_resource_group) +//var shared_subnet_names = [for nic_index in range(0, subnet_count): nic_index==0 ? 'default' : 'test-subnet-${nic_index}'] var availability_set_tags = availability_options.availability_set_tags var availability_set_properties = availability_options.availability_set_properties var availability_zones = availability_options.availability_zones @@ -224,15 +230,15 @@ func getAvailabilitySetId(availability_set_name string) object => { id: resourceId('Microsoft.Compute/availabilitySets', availability_set_name) } + module nodes_nics './nested_nodes_nics.bicep' = [for i in range(0, node_count): { name: '${nodes[i].name}-nics' params: { vmName: nodes[i].name nic_count: nodes[i].nic_count location: location - vnet_id: vnet_id - subnet_prefix: subnet_prefix - existing_subnet_ref: existing_subnet_ref + vnet_id: virtual_network.id + resource_group_index: resource_group_index enable_sriov: nodes[i].enable_sriov tags: tags use_ipv6: use_ipv6 @@ -244,42 +250,78 @@ module nodes_nics './nested_nodes_nics.bicep' = [for i in range(0, node_count): ] }] -resource virtual_network_name_resource 'Microsoft.Network/virtualNetworks@2024-05-01' = if (empty(virtual_network_resource_group)) { +resource orchestrator_vnet 'Microsoft.Network/virtualNetworks@2024-01-01' existing = if (use_existing_vnet) { + scope: resourceGroup(virtual_network_resource_group) name: virtual_network_name +} + + +// If there is already a vnet, LISA only needs to create the test nic subnets. 10.0.0.0/24 must already exist. +// This deployment should generate an exception at runtime if two environments have overlapping address spaces; +// this is expected and will happen if the environment id mod 256 rolls over while an old environment is still active. This should be rare, but will work out so long as: +// LISA must catch this exception and retry the deployment after a timeout period to allow the old environment to be cleaned up. +// LISA must remove old subnets when an environment is not needed anymore. +// This will ensure no collisions occur where one test in a subnet can disturb another in the same subnet. + +module remotePeering 'remote-peering.bicep' = if (use_existing_vnet) { + name: 'remote-peering-deployment' + scope: resourceGroup(virtual_network_resource_group) + dependsOn: [ + peering + ] + params: { + remoteVnetName: virtual_network_name + localVnetId: virtual_network.id + resource_group_index: resource_group_index + } +} + +resource peering 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2023-11-01' = if (use_existing_vnet) { + name: 'vnet-peering-e${resource_group_index}' + parent: virtual_network + properties: { + allowVirtualNetworkAccess: true + localSubnetNames: [ 'default' ] + peerCompleteVnets: false + remoteSubnetNames:['default'] + remoteVirtualNetwork: { + id: orchestrator_vnet.id + } + //remoteVirtualNetwork: orchestrator_vnet // reference to the orchestrator vnet + } + } + + +resource virtual_network 'Microsoft.Network/virtualNetworks@2024-05-01' = { + name: 'test-vnet-${resource_group_index}' tags: tags location: location properties: { addressSpace: { - addressPrefixes: concat( - ['10.0.0.0/16'], + addressPrefixes: empty(subnet_prefix) ? (concat( + ['10.${rg_index_div_256}.${rg_index_mod_256}.0/24', '192.168.0.0/16' ], use_ipv6 ? ['2001:db8::/32'] : [] - ) - } - subnets: [for j in range(0, subnet_count): { - name: '${subnet_prefix}${j}' - properties: { - addressPrefixes: concat( - ['10.0.${j}.0/24'], - use_ipv6 ? ['2001:db8:${j}::/64'] : [] - ) + )) : [ subnet_prefix ] + } + subnets: [ for i in range(0,subnet_count): { + name: empty(subnet_prefix) ? (i==0 ? 'default' : 'test-subnet-${i}') : subnet_prefix + properties: { + addressPrefix: empty(subnet_prefix) ? (i==0 ? '10.${rg_index_div_256}.${rg_index_mod_256}.0/24' : '192.168.${i-1}.0/24' ) : subnet_prefix defaultOutboundAccess: enable_vm_nat - networkSecurityGroup: { - id: resourceId('Microsoft.Network/networkSecurityGroups', '${toLower(virtual_network_name)}-nsg') + networkSecurityGroup:{ + id: nsg.id + } } - } }] } - dependsOn: [ - nsg - ] } resource nsg 'Microsoft.Network/networkSecurityGroups@2024-05-01' = { - name: '${toLower(virtual_network_name)}-nsg' + name: 'lisa-test-nsg-${resource_group_index}' location: location properties: { securityRules: [ - { + { name: 'LISASSH' properties: { priority: 100 @@ -491,7 +533,7 @@ resource nodes_vms 'Microsoft.Compute/virtualMachines@2024-03-01' = [for i in ra availability_set nodes_image nodes_nics - virtual_network_name_resource + virtual_network nodes_disk nodes_data_disks_with_vhds ] diff --git a/lisa/sut_orchestrator/azure/autogen_arm_template.json b/lisa/sut_orchestrator/azure/autogen_arm_template.json index 52e1e79bb9..ad90122aa7 100644 --- a/lisa/sut_orchestrator/azure/autogen_arm_template.json +++ b/lisa/sut_orchestrator/azure/autogen_arm_template.json @@ -5,8 +5,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.36.177.2456", - "templateHash": "18099469189437246926" + "version": "0.41.2.15936", + "templateHash": "8018757189124818305" } }, "functions": [ @@ -463,6 +463,18 @@ "description": "created subnet count" } }, + "subnet_prefix": { + "type": "string", + "metadata": { + "description": "user supplied subnet prefix (incompatible with virtual_network_resource_group)" + } + }, + "resource_group_index": { + "type": "int", + "metadata": { + "description": "index of the test resource group for shared vnet subnet mapping" + } + }, "availability_options": { "type": "object", "metadata": { @@ -481,12 +493,6 @@ "description": "the name of vnet" } }, - "subnet_prefix": { - "type": "string", - "metadata": { - "description": "the prefix of the subnets" - } - }, "vm_tags": { "type": "object", "metadata": { @@ -562,10 +568,11 @@ } } ], - "vnet_id": "[resourceId('Microsoft.Network/virtualNetworks', parameters('virtual_network_name'))]", "node_count": "[length(parameters('nodes'))]", "availability_set_name_value": "lisa-availabilitySet", - "existing_subnet_ref": "[if(empty(parameters('virtual_network_resource_group')), '', resourceId(parameters('virtual_network_resource_group'), 'Microsoft.Network/virtualNetworks/subnets', parameters('virtual_network_name'), parameters('subnet_prefix')))]", + "rg_index_mod_256": "[mod(parameters('resource_group_index'), 256)]", + "rg_index_div_256": "[div(parameters('resource_group_index'), 256)]", + "use_existing_vnet": "[not(empty(parameters('virtual_network_resource_group')))]", "availability_set_tags": "[parameters('availability_options').availability_set_tags]", "availability_set_properties": "[parameters('availability_options').availability_set_properties]", "availability_zones": "[parameters('availability_options').availability_zones]", @@ -577,11 +584,40 @@ "combined_aset_tags": "[union(parameters('tags'), variables('availability_set_tags'))]" }, "resources": { - "virtual_network_name_resource": { - "condition": "[empty(parameters('virtual_network_resource_group'))]", + "orchestrator_vnet": { + "condition": "[variables('use_existing_vnet')]", + "existing": true, + "type": "Microsoft.Network/virtualNetworks", + "apiVersion": "2024-01-01", + "resourceGroup": "[parameters('virtual_network_resource_group')]", + "name": "[parameters('virtual_network_name')]" + }, + "peering": { + "condition": "[variables('use_existing_vnet')]", + "type": "Microsoft.Network/virtualNetworks/virtualNetworkPeerings", + "apiVersion": "2023-11-01", + "name": "[format('{0}/{1}', format('test-vnet-{0}', parameters('resource_group_index')), format('vnet-peering-e{0}', parameters('resource_group_index')))]", + "properties": { + "allowVirtualNetworkAccess": true, + "localSubnetNames": [ + "default" + ], + "peerCompleteVnets": false, + "remoteSubnetNames": [ + "default" + ], + "remoteVirtualNetwork": { + "id": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('virtual_network_resource_group')), 'Microsoft.Network/virtualNetworks', parameters('virtual_network_name'))]" + } + }, + "dependsOn": [ + "virtual_network" + ] + }, + "virtual_network": { "type": "Microsoft.Network/virtualNetworks", "apiVersion": "2024-05-01", - "name": "[parameters('virtual_network_name')]", + "name": "[format('test-vnet-{0}', parameters('resource_group_index'))]", "tags": "[parameters('tags')]", "location": "[parameters('location')]", "properties": { @@ -590,19 +626,19 @@ "name": "subnets", "count": "[length(range(0, parameters('subnet_count')))]", "input": { - "name": "[format('{0}{1}', parameters('subnet_prefix'), range(0, parameters('subnet_count'))[copyIndex('subnets')])]", + "name": "[if(empty(parameters('subnet_prefix')), if(equals(range(0, parameters('subnet_count'))[copyIndex('subnets')], 0), 'default', format('test-subnet-{0}', range(0, parameters('subnet_count'))[copyIndex('subnets')])), parameters('subnet_prefix'))]", "properties": { - "addressPrefixes": "[concat(createArray(format('10.0.{0}.0/24', range(0, parameters('subnet_count'))[copyIndex('subnets')])), if(parameters('use_ipv6'), createArray(format('2001:db8:{0}::/64', range(0, parameters('subnet_count'))[copyIndex('subnets')])), createArray()))]", + "addressPrefix": "[if(empty(parameters('subnet_prefix')), if(equals(range(0, parameters('subnet_count'))[copyIndex('subnets')], 0), format('10.{0}.{1}.0/24', variables('rg_index_div_256'), variables('rg_index_mod_256')), format('192.168.{0}.0/24', sub(range(0, parameters('subnet_count'))[copyIndex('subnets')], 1))), parameters('subnet_prefix'))]", "defaultOutboundAccess": "[parameters('enable_vm_nat')]", "networkSecurityGroup": { - "id": "[resourceId('Microsoft.Network/networkSecurityGroups', format('{0}-nsg', toLower(parameters('virtual_network_name'))))]" + "id": "[resourceId('Microsoft.Network/networkSecurityGroups', format('lisa-test-nsg-{0}', parameters('resource_group_index')))]" } } } } ], "addressSpace": { - "addressPrefixes": "[concat(createArray('10.0.0.0/16'), if(parameters('use_ipv6'), createArray('2001:db8::/32'), createArray()))]" + "addressPrefixes": "[if(empty(parameters('subnet_prefix')), concat(createArray(format('10.{0}.{1}.0/24', variables('rg_index_div_256'), variables('rg_index_mod_256')), '192.168.0.0/16'), if(parameters('use_ipv6'), createArray('2001:db8::/32'), createArray())), createArray(parameters('subnet_prefix')))]" } }, "dependsOn": [ @@ -612,7 +648,7 @@ "nsg": { "type": "Microsoft.Network/networkSecurityGroups", "apiVersion": "2024-05-01", - "name": "[format('{0}-nsg', toLower(parameters('virtual_network_name')))]", + "name": "[format('lisa-test-nsg-{0}', parameters('resource_group_index'))]", "location": "[parameters('location')]", "properties": { "securityRules": [ @@ -874,7 +910,7 @@ "nodes_disk", "nodes_image", "nodes_nics", - "virtual_network_name_resource" + "virtual_network" ] }, "nodes_nics": { @@ -883,7 +919,7 @@ "count": "[length(range(0, variables('node_count')))]" }, "type": "Microsoft.Resources/deployments", - "apiVersion": "2022-09-01", + "apiVersion": "2025-04-01", "name": "[format('{0}-nics', parameters('nodes')[range(0, variables('node_count'))[copyIndex()]].name)]", "properties": { "expressionEvaluationOptions": { @@ -901,13 +937,10 @@ "value": "[parameters('location')]" }, "vnet_id": { - "value": "[variables('vnet_id')]" - }, - "subnet_prefix": { - "value": "[parameters('subnet_prefix')]" + "value": "[resourceId('Microsoft.Network/virtualNetworks', format('test-vnet-{0}', parameters('resource_group_index')))]" }, - "existing_subnet_ref": { - "value": "[variables('existing_subnet_ref')]" + "resource_group_index": { + "value": "[parameters('resource_group_index')]" }, "enable_sriov": { "value": "[parameters('nodes')[range(0, variables('node_count'))[copyIndex()]].enable_sriov]" @@ -929,8 +962,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.36.177.2456", - "templateHash": "15654609969338604205" + "version": "0.41.2.15936", + "templateHash": "7649300423127023996" } }, "functions": [ @@ -954,6 +987,22 @@ "id": "[resourceId('Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]" } } + }, + "getSubnetName": { + "parameters": [ + { + "type": "int", + "name": "resource_group_index" + }, + { + "type": "int", + "name": "nic_index" + } + ], + "output": { + "type": "string", + "value": "[if(equals(parameters('nic_index'), 0), 'default', format('test-subnet-{0}', parameters('nic_index')))]" + } } } } @@ -971,11 +1020,8 @@ "vnet_id": { "type": "string" }, - "subnet_prefix": { - "type": "string" - }, - "existing_subnet_ref": { - "type": "string" + "resource_group_index": { + "type": "int" }, "enable_sriov": { "type": "bool" @@ -1006,7 +1052,7 @@ "location": "[parameters('location')]", "tags": "[parameters('tags')]", "properties": { - "ipConfigurations": "[concat(createArray(createObject('name', 'IPv4Config', 'properties', createObject('privateIPAddressVersion', 'IPv4', 'publicIPAddress', if(and(equals(0, range(0, parameters('nic_count'))[copyIndex()]), parameters('create_public_address')), variables('publicIpAddress'), null()), 'subnet', createObject('id', if(not(empty(parameters('existing_subnet_ref'))), parameters('existing_subnet_ref'), format('{0}/subnets/{1}{2}', parameters('vnet_id'), parameters('subnet_prefix'), range(0, parameters('nic_count'))[copyIndex()]))), 'privateIPAllocationMethod', 'Dynamic'))), if(parameters('use_ipv6'), createArray(createObject('name', 'IPv6Config', 'properties', createObject('privateIPAddressVersion', 'IPv6', 'publicIPAddress', if(and(equals(0, range(0, parameters('nic_count'))[copyIndex()]), parameters('create_public_address')), variables('publicIpAddressV6'), null()), 'subnet', createObject('id', if(not(empty(parameters('existing_subnet_ref'))), parameters('existing_subnet_ref'), format('{0}/subnets/{1}{2}', parameters('vnet_id'), parameters('subnet_prefix'), range(0, parameters('nic_count'))[copyIndex()]))), 'privateIPAllocationMethod', 'Dynamic'))), createArray()))]", + "ipConfigurations": "[concat(createArray(createObject('name', 'IPv4Config', 'properties', createObject('privateIPAddressVersion', 'IPv4', 'publicIPAddress', if(and(equals(0, range(0, parameters('nic_count'))[copyIndex()]), parameters('create_public_address')), variables('publicIpAddress'), null()), 'subnet', createObject('id', format('{0}/subnets/{1}', parameters('vnet_id'), __bicep.getSubnetName(parameters('resource_group_index'), range(0, parameters('nic_count'))[copyIndex()]))), 'privateIPAllocationMethod', 'Dynamic'))), if(parameters('use_ipv6'), createArray(createObject('name', 'IPv6Config', 'properties', createObject('privateIPAddressVersion', 'IPv6', 'publicIPAddress', if(and(equals(0, range(0, parameters('nic_count'))[copyIndex()]), parameters('create_public_address')), variables('publicIpAddressV6'), null()), 'subnet', createObject('id', format('{0}/subnets/{1}', parameters('vnet_id'), __bicep.getSubnetName(parameters('resource_group_index'), range(0, parameters('nic_count'))[copyIndex()]))), 'privateIPAllocationMethod', 'Dynamic'))), createArray()))]", "enableAcceleratedNetworking": "[parameters('enable_sriov')]" } } @@ -1016,7 +1062,86 @@ "dependsOn": [ "[format('nodes_public_ip[{0}]', range(0, variables('node_count'))[copyIndex()])]", "[format('nodes_public_ip_ipv6[{0}]', range(0, variables('node_count'))[copyIndex()])]", - "virtual_network_name_resource" + "virtual_network" + ] + }, + "remotePeering": { + "condition": "[variables('use_existing_vnet')]", + "type": "Microsoft.Resources/deployments", + "apiVersion": "2025-04-01", + "name": "remote-peering-deployment", + "resourceGroup": "[parameters('virtual_network_resource_group')]", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "remoteVnetName": { + "value": "[parameters('virtual_network_name')]" + }, + "localVnetId": { + "value": "[resourceId('Microsoft.Network/virtualNetworks', format('test-vnet-{0}', parameters('resource_group_index')))]" + }, + "resource_group_index": { + "value": "[parameters('resource_group_index')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.41.2.15936", + "templateHash": "4298084375997684263" + } + }, + "parameters": { + "remoteVnetName": { + "type": "string", + "metadata": { + "description": "Name of the existing remote virtual network to peer with" + } + }, + "localVnetId": { + "type": "string", + "metadata": { + "description": "ID of the local VNet" + } + }, + "resource_group_index": { + "type": "int", + "metadata": { + "description": "resource group index (for unique naming)" + } + } + }, + "resources": [ + { + "type": "Microsoft.Network/virtualNetworks/virtualNetworkPeerings", + "apiVersion": "2023-11-01", + "name": "[format('{0}/{1}', parameters('remoteVnetName'), format('vnet-peering-e{0}', parameters('resource_group_index')))]", + "properties": { + "allowVirtualNetworkAccess": true, + "localSubnetNames": [ + "default" + ], + "peerCompleteVnets": false, + "remoteSubnetNames": [ + "default" + ], + "remoteVirtualNetwork": { + "id": "[parameters('localVnetId')]" + } + } + } + ] + } + }, + "dependsOn": [ + "peering", + "virtual_network" ] } } diff --git a/lisa/sut_orchestrator/azure/common.py b/lisa/sut_orchestrator/azure/common.py index 2675b95e63..d6b922b615 100644 --- a/lisa/sut_orchestrator/azure/common.py +++ b/lisa/sut_orchestrator/azure/common.py @@ -112,7 +112,6 @@ AZURE_SHARED_RG_NAME = "lisa_shared_resource" AZURE_VIRTUAL_NETWORK_NAME = "lisa-virtualNetwork" -AZURE_SUBNET_PREFIX = "lisa-subnet-" NIC_NAME_PATTERN = re.compile(r"Microsoft.Network/networkInterfaces/(.*)", re.M) PATTERN_PUBLIC_IP_NAME = re.compile( @@ -1257,13 +1256,14 @@ class AzureArmParameter: virtual_network_resource_group: str = "" virtual_network_name: str = AZURE_VIRTUAL_NETWORK_NAME - subnet_prefix: str = AZURE_SUBNET_PREFIX is_ultradisk: bool = False is_data_disk_with_vhd: bool = False use_ipv6: bool = False enable_vm_nat: bool = False create_public_address: bool = True source_address_prefixes: List[str] = field(default_factory=list) + resource_group_index: Optional[int] = None + subnet_prefix: str = field(default="") def __post_init__(self, *args: Any, **kwargs: Any) -> None: add_secret(self.admin_username, PATTERN_HEADTAIL) @@ -1712,6 +1712,39 @@ def get_virtual_networks( return virtual_network_dict +def remove_vnet_peerings( + platform: "AzurePlatform", resource_group_name: str, environment_id: int +) -> None: + # delete both the vnet peering for a resource group and the corresponding link + # in the remote group. + # peerings for test resources in our vnet sharing scheme will only be + # peered with one remote VM vnet for orchestration; so we only need to find + # one corresponding peering on the remote side. + network_client = get_network_client(platform) + vnets = network_client.virtual_networks.list( + resource_group_name=resource_group_name + ) + for vnet in vnets: + peerings = network_client.virtual_network_peerings.get( + resource_group_name, name=vnet.name + ) + for peering in peerings: + # remove the local peerings + network_client.virtual_network_peerings.begin_delete( + resource_group_name, vnet.name, peering.name + ).wait() + # remove the remote peering + network_client.virtual_network_peerings.begin_delete( + peering.remote_virtual_network.split("/")[4], + peering.remote_virtual_network.split("/")[-1], + f"vnet-peering-e{environment_id}", + ).wait() + # for subnet in vnet.subnets: + # network_client.subnets.begin_delete( + # resource_group_name, vnet.name, subnet.name + # ).wait() + + def get_network_client(platform: "AzurePlatform") -> NetworkManagementClient: return NetworkManagementClient( credential=platform.credential, diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index a077b90a10..e02032494d 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import asyncio import copy +import ipaddress import json import re import string diff --git a/lisa/sut_orchestrator/azure/nested_nodes_nics.bicep b/lisa/sut_orchestrator/azure/nested_nodes_nics.bicep index 8f283ca78b..f0c41daa53 100644 --- a/lisa/sut_orchestrator/azure/nested_nodes_nics.bicep +++ b/lisa/sut_orchestrator/azure/nested_nodes_nics.bicep @@ -2,8 +2,7 @@ param vmName string param nic_count int param location string param vnet_id string -param subnet_prefix string -param existing_subnet_ref string +param resource_group_index int param enable_sriov bool param tags object param use_ipv6 bool @@ -16,6 +15,8 @@ func getPublicIpAddress(vmName string, publicIpName string) object => { var publicIpAddress = getPublicIpAddress(vmName, '${vmName}-public-ip') var publicIpAddressV6 = getPublicIpAddress(vmName, '${vmName}-public-ipv6') +func getSubnetName(resource_group_index int, nic_index int) string => nic_index == 0 ? 'default' : 'test-subnet-${nic_index}' + resource vm_nics 'Microsoft.Network/networkInterfaces@2023-06-01' = [for i in range(0, nic_count): { name: '${vmName}-nic-${i}' location: location @@ -29,7 +30,7 @@ resource vm_nics 'Microsoft.Network/networkInterfaces@2023-06-01' = [for i in ra privateIPAddressVersion: 'IPv4' publicIPAddress: ((0 == i && create_public_address) ? publicIpAddress : null) subnet: { - id: ((!empty(existing_subnet_ref)) ? existing_subnet_ref : '${vnet_id}/subnets/${subnet_prefix}${i}') + id: '${vnet_id}/subnets/${getSubnetName(resource_group_index, i)}' } privateIPAllocationMethod: 'Dynamic' } @@ -42,7 +43,7 @@ resource vm_nics 'Microsoft.Network/networkInterfaces@2023-06-01' = [for i in ra privateIPAddressVersion: 'IPv6' publicIPAddress: ((0 == i && create_public_address) ? publicIpAddressV6 : null) subnet: { - id: ((!empty(existing_subnet_ref)) ? existing_subnet_ref : '${vnet_id}/subnets/${subnet_prefix}${i}') + id: '${vnet_id}/subnets/${getSubnetName(resource_group_index, i)}' } privateIPAllocationMethod: 'Dynamic' } diff --git a/lisa/sut_orchestrator/azure/platform_.py b/lisa/sut_orchestrator/azure/platform_.py index ab01c8342c..bb6c554d14 100644 --- a/lisa/sut_orchestrator/azure/platform_.py +++ b/lisa/sut_orchestrator/azure/platform_.py @@ -68,6 +68,7 @@ from lisa.tools import Hostname, KernelConfig, Modinfo, Whoami from lisa.tools.lsinitrd import Lsinitrd from lisa.util import ( + DeploymentActiveException, KernelPanicException, LisaException, LisaTimeoutException, @@ -103,7 +104,6 @@ from . import features from .common import ( AZURE_SHARED_RG_NAME, - AZURE_SUBNET_PREFIX, AZURE_VIRTUAL_NETWORK_NAME, SAS_URL_PATTERN, AzureArmParameter, @@ -133,9 +133,11 @@ get_static_access_token, get_storage_account_name, get_vhd_details, + get_virtual_networks, get_vm, global_credential_access_lock, load_location_info_from_file, + remove_vnet_peerings, save_console_log, wait_operation, ) @@ -309,6 +311,7 @@ class AzurePlatformSchema: ) vm_tags: Optional[Dict[str, Any]] = field(default=None) tags: Optional[Dict[str, Any]] = field(default=None) + subnet_prefix: Optional[str] = field(default=None) use_public_address: bool = field(default=True) create_public_address: bool = field(default=True) use_ipv6: bool = field(default=False) @@ -322,7 +325,6 @@ class AzurePlatformSchema: virtual_network_resource_group: str = field(default="") virtual_network_name: str = field(default=AZURE_VIRTUAL_NETWORK_NAME) - subnet_prefix: str = field(default=AZURE_SUBNET_PREFIX) # Provisioning error causes by waagent is not ready or other reasons. In # smoke test, it can verify some points also. Other tests should use the @@ -701,6 +703,8 @@ def _delete_environment(self, environment: Environment, log: Logger) -> None: delete_operation = self._rm_client.resource_groups.begin_delete( resource_group_name ) + if self._azure_runbook.virtual_network_resource_group: + remove_vnet_peerings(self._platform, resource_group_name) except Exception as e: log.debug(f"exception on delete resource group: {e}") if delete_operation and self._azure_runbook.wait_delete: @@ -1216,12 +1220,20 @@ def _create_deployment_parameters( arm_parameters.virtual_network_resource_group = ( self._azure_runbook.virtual_network_resource_group ) - arm_parameters.subnet_prefix = ( - self._azure_runbook.subnet_prefix or AZURE_SUBNET_PREFIX - ) + arm_parameters.virtual_network_name = ( self._azure_runbook.virtual_network_name or AZURE_VIRTUAL_NETWORK_NAME ) + arm_parameters.subnet_prefix = self._azure_runbook.subnet_prefix or "" + if ( + arm_parameters.subnet_prefix + and arm_parameters.virtual_network_resource_group + ): + log.warn( + "subnet_prefix and virtual_network_resource_group runbook options " + "may introduce unexpected failures due to network peering " + "address prefix collisions." + ) arm_parameters.use_ipv6 = self._azure_runbook.use_ipv6 is_windows: bool = False @@ -1249,6 +1261,9 @@ def _create_deployment_parameters( arm_parameters.vm_tags["lisa_username"] = local().tools[Whoami].get_username() arm_parameters.vm_tags["lisa_hostname"] = local().tools[Hostname].get_hostname() + # pass the rg id to the arm template + arm_parameters.resource_group_index = int(environment.id) + nodes_parameters: List[AzureNodeArmParameter] = [] features_settings: Dict[str, schema.FeatureSettings] = {} @@ -1270,11 +1285,13 @@ def _create_deployment_parameters( azure_node_runbook = self._create_node_runbook( len(nodes_parameters), node_space, log, resource_group_name ) + # save parsed runbook back, for example, the version of marketplace may be # parsed from latest to a specified version. node.capability.set_extended_runbook(azure_node_runbook) node_arm_parameters = self._create_node_arm_parameters(node.capability, log) + nodes_parameters.append(node_arm_parameters) arm_parameters.is_ultradisk = any( @@ -1693,6 +1710,7 @@ def _validate_template( plugin_manager.hook.azure_deploy_failed(error_message=error_message) raise LisaException(error_message) + @retry(DeploymentActiveException, tries=5, delay=30, jitter=(0, 10)) # type: ignore def _deploy( self, location: str, @@ -1777,6 +1795,8 @@ def _deploy( f"provisioning failed for an internal error, try to run case. " f"Exception: {error_message}" ) + elif "DeploymentActive" in error_message: + raise DeploymentActiveException(e) else: try: self._save_console_log_and_check_panic( diff --git a/lisa/sut_orchestrator/azure/remote-peering.bicep b/lisa/sut_orchestrator/azure/remote-peering.bicep new file mode 100644 index 0000000000..61f72b0eba --- /dev/null +++ b/lisa/sut_orchestrator/azure/remote-peering.bicep @@ -0,0 +1,27 @@ +@description('Name of the existing remote virtual network to peer with') +param remoteVnetName string + +@description('ID of the local VNet') +param localVnetId string + +@description('resource group index (for unique naming)') +param resource_group_index int + +resource remoteVnet 'Microsoft.Network/virtualNetworks@2023-11-01' existing = { + name: remoteVnetName +} + +resource peering 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2023-11-01' = { + parent: remoteVnet + name: 'vnet-peering-e${resource_group_index}' + properties: { + allowVirtualNetworkAccess: true + localSubnetNames: [ 'default' ] + peerCompleteVnets: false + remoteSubnetNames:['default'] + remoteVirtualNetwork: { + id: localVnetId + } + //remoteVirtualNetwork: orchestrator_vnet // reference to the orchestrator vnet + } + } diff --git a/lisa/tools/ip.py b/lisa/tools/ip.py index 5b1748da35..7ab965000c 100644 --- a/lisa/tools/ip.py +++ b/lisa/tools/ip.py @@ -17,10 +17,19 @@ class IpInfo: - def __init__(self, nic_name: str, mac_addr: str, ip_addr: str): + # subnet mask assumes /24, *this is only a convention from the deployment templates for LISA.* + # if subnet configurations change in the future, this will also need to change. + def __init__( + self, nic_name: str, mac_addr: str, ip_addr: str, subnet_mask: str = "24" + ) -> None: self.nic_name = nic_name self.mac_addr = mac_addr self.ip_addr = ip_addr + self.subnet = ( + ipaddress.ip_network(f"{ip_addr}/{subnet_mask}", strict=False) + if all([ip_addr, subnet_mask]) + else None + ) class Ip(Tool): @@ -302,6 +311,7 @@ def get_info(self, nic_name: Optional[str] = None) -> List[IpInfo]: nic_name=matched["name"], mac_addr=matched["mac"], ip_addr=matched["ip_addr"], + subnet_mask=matched["subnet_mask"], ) ) return found_nics diff --git a/lisa/util/__init__.py b/lisa/util/__init__.py index 75b9584f82..b25d0ef478 100644 --- a/lisa/util/__init__.py +++ b/lisa/util/__init__.py @@ -161,6 +161,17 @@ def __init__(self, *args: object) -> None: super().__init__(*args) +class DeploymentActiveException(LisaException): + """ + This exception is used to indicate that there is already an active deployment on a resource. + It may be caused by the previous deployment not cleaned up yet, a deployment adding a resource to an existing resource, + or a deployment with the same name as another deployment. + This is a retryable exception: LISA can catch this exception retry the deployment after a timeout period. + """ + + ... + + class UnsupportedOperationException(LisaException): """ An operation might not be supported. Use this exception to