diff --git a/ocs_ci/utility/ibmcloud.py b/ocs_ci/utility/ibmcloud.py index 7c17b86e6edc..acdf28dfdfb9 100644 --- a/ocs_ci/utility/ibmcloud.py +++ b/ocs_ci/utility/ibmcloud.py @@ -36,7 +36,6 @@ from ocs_ci.utility.utils import get_infra_id, get_ocp_version, run_cmd, TimeoutSampler from ocs_ci.ocs.node import get_nodes - logger = logging.getLogger(__name__) @@ -1477,6 +1476,134 @@ def configure_ingress_load_balancer_security_group(): raise +def _get_lb_security_groups(svc_name, namespace): + """ + Look up the IBM Cloud VPC load balancer backing a Kubernetes + LoadBalancer Service and return its security groups. + + Args: + svc_name (str): Kubernetes Service name + namespace (str): Kubernetes namespace + + Returns: + list: security group dicts from the VPC LB, empty list on + failure + + """ + rg_name = get_resource_group_name(config.ENV_DATA["cluster_path"]) + + svc_ocp = OCP( + kind="Service", + namespace=namespace, + resource_name=svc_name, + ) + svc_data = svc_ocp.get() + + lb_ingress = svc_data.get("status", {}).get("loadBalancer", {}).get("ingress", []) + if not lb_ingress: + logger.warning( + f"No LB ingress on service {svc_name}, cannot configure " f"security group" + ) + return [] + + lb_hostname = lb_ingress[0].get("hostname") or lb_ingress[0].get("ip") + if not lb_hostname: + logger.warning(f"No hostname/IP in LB ingress for service {svc_name}") + return [] + + logger.debug(f"LB endpoint for {svc_name}: {lb_hostname}") + + cmd = f"ibmcloud is lbs --resource-group-name {rg_name} " f"--output json" + out = run_ibmcloud_cmd(cmd) + load_balancers = json.loads(out) + + matching_lb = None + for lb in load_balancers: + if lb.get("hostname") == lb_hostname: + matching_lb = lb + break + + if not matching_lb: + logger.error(f"Could not find IBM Cloud VPC LB with hostname {lb_hostname}") + return [] + + security_groups = matching_lb.get("security_groups", []) + if not security_groups: + logger.warning(f"No security groups on LB {matching_lb.get('name')}") + return security_groups + + +def configure_nfs_lb_security_group(): + """ + Add an inbound TCP rule for port 2049 (NFS) to the security + groups attached to the NFS LoadBalancer on IBM Cloud VPC. + + Must be called after the ``rook-ceph-nfs-my-nfs-load-balancer`` + Service has an ingress address assigned. + """ + svc_name = "rook-ceph-nfs-my-nfs-load-balancer" + namespace = constants.OPENSHIFT_STORAGE_NAMESPACE + logger.info("Configuring NFS LB security group for port 2049") + + security_groups = _get_lb_security_groups(svc_name, namespace) + for sg in security_groups: + sg_name = sg.get("name") + try: + logger.info(f"Adding inbound TCP 2049 to {sg_name}") + add_security_group_rule(sg_name, "inbound", "tcp", 2049, 2049) + except Exception as e: + logger.warning( + f"Failed to add port 2049 rule to {sg_name} " + f"(may already exist): {e}" + ) + + logger.info("NFS LB security group configuration done") + + +def remove_nfs_lb_security_group_rules(): + """ + Remove inbound TCP 2049 rules from the security groups attached + to the NFS LoadBalancer on IBM Cloud VPC. + + Should be called before deleting the NFS LoadBalancer Service so + the VPC LB is still present for look-up. + """ + svc_name = "rook-ceph-nfs-my-nfs-load-balancer" + namespace = constants.OPENSHIFT_STORAGE_NAMESPACE + logger.info("Removing NFS LB security group rules for port 2049") + + security_groups = _get_lb_security_groups(svc_name, namespace) + for sg in security_groups: + sg_id = sg.get("id") + sg_name = sg.get("name") + try: + cmd = f"ibmcloud is security-group {sg_id} " f"--output json" + out = run_ibmcloud_cmd(cmd) + sg_detail = json.loads(out) + except Exception as e: + logger.warning(f"Could not fetch rules for {sg_name}: {e}") + continue + + for rule in sg_detail.get("rules", []): + if ( + rule.get("direction") == "inbound" + and rule.get("protocol") == "tcp" + and rule.get("port_min") == 2049 + and rule.get("port_max") == 2049 + ): + rule_id = rule.get("id") + logger.info(f"Deleting rule {rule_id} from {sg_name}") + try: + run_ibmcloud_cmd( + f"ibmcloud is security-group-rule-delete " + f"{sg_id} {rule_id} --force" + ) + except Exception as e: + logger.warning(f"Failed to delete rule {rule_id}: {e}") + + logger.info("NFS LB security group cleanup done") + + def create_address_prefix(prefix_name, vpc, zone, cidr): """ Create address prefix in VPC. diff --git a/ocs_ci/utility/nfs_utils.py b/ocs_ci/utility/nfs_utils.py index b9e056d91003..4898747fc6c2 100644 --- a/ocs_ci/utility/nfs_utils.py +++ b/ocs_ci/utility/nfs_utils.py @@ -196,13 +196,22 @@ def create_nfs_load_balancer_service( if "hostname" in host_details: hostname_add = host_details["hostname"] log.info("ingress hostname, %s", hostname_add) - return hostname_add elif "ip" in host_details: - host_ip = host_details["ip"] - log.info("ingress host ip, %s", host_ip) - return host_ip + hostname_add = host_details["ip"] + log.info(f"ingress host ip, {hostname_add}") else: log.error("host details unavailable") + return None + + platform = config.ENV_DATA.get("platform", "").lower() + if platform == constants.IBMCLOUD_PLATFORM: + from ocs_ci.utility.ibmcloud import ( + configure_nfs_lb_security_group, + ) + + configure_nfs_lb_security_group() + + return hostname_add def update_etc_hosts_on_nfs_client(con, hostname): @@ -297,6 +306,14 @@ def delete_nfs_load_balancer_service( ) return + platform = config.ENV_DATA.get("platform", "").lower() + if platform == constants.IBMCLOUD_PLATFORM: + from ocs_ci.utility.ibmcloud import ( + remove_nfs_lb_security_group_rules, + ) + + remove_nfs_lb_security_group_rules() + log.info("Deleting NFS LoadBalancer service %s", svc_name) storage_cluster_obj.exec_oc_cmd(f"delete service {svc_name}") diff --git a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py index 060982f94735..617752609375 100644 --- a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py +++ b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py @@ -359,6 +359,28 @@ def __make_connection(): nfs_utils.update_etc_hosts_on_nfs_client(con, hostname_add) return con + def _mount_nfs_with_retry(self, cmd, tries=28, delay=10): + """ + Execute an NFS mount command on the client VM with retry. + + Args: + cmd (str): Mount command to execute on the NFS client VM + tries (int): Number of retry attempts (default: 28) + delay (int): Delay in seconds between retries (default: 10) + + Raises: + CommandFailed: If mount does not succeed within the retry limit + """ + + def _do_mount(): + retcode, _, stderr = self.con.exec_cmd(cmd) + if retcode != 0: + raise CommandFailed( + f"NFS mount command failed with retcode " f"{retcode}: {stderr}" + ) + + retry((CommandFailed), tries=tries, delay=delay)(_do_mount)() + @tier1 @polarion_id("OCS-4269") @skipif_hci_client @@ -569,11 +591,7 @@ def test_outcluster_nfs_export( + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to read exported volume command = f"cat {self.test_folder}/index.html" @@ -741,11 +759,7 @@ def test_multiple_nfs_based_PVs( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to access exported volume command = f"cat {self.test_folder}/index.html" @@ -874,11 +888,7 @@ def test_multiple_mounts_of_same_nfs_volume( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to access exported volume command = f"cat {self.test_folder}/shared_file.html" @@ -1007,11 +1017,7 @@ def test_external_nfs_client_can_write_read_new_file( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to write new file in exported volume by external client command = ( @@ -1612,16 +1618,24 @@ def test_nfs_pvc_subvolume_deletion( # Checking for stale volumes output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale") + stale_before = self.parse_subvolume_ls_output(output) + log.info(f"Stale subvolumes before delete: {stale_before}") - # Deleteing stale subvolume - exec_cmd( - cmd=f"{odf_cli_path} subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" + # Deleting stale subvolume + delete_output = exec_cmd( + cmd=f"{odf_cli_path} subvolume delete" + f" {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" ) + log.info(f"Subvolume delete output: {delete_output.stdout}") - # Checking for stale volumes + # Verify the specific subvolume was deleted output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale") - stale_volumes = self.parse_subvolume_ls_output(output) - assert len(stale_volumes) == 0 # No stale volumes available + stale_after = self.parse_subvolume_ls_output(output) + log.info(f"Stale subvolumes after delete: {stale_after}") + stale_svs = {sv[1] for sv in stale_after} + assert ( + new_pvc[1] not in stale_svs + ), f"Subvolume {new_pvc[1]} still stale after delete" # Delete ocs-storagecluster-ceph-nfs-retain storageclass self.sc_obj.delete(resource_name=self.retain_nfs_sc_name) @@ -1772,11 +1786,7 @@ def test_incluster_outcluster_nfs_export_for_non_default_nfs_sc( + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to read exported volume command = f"cat {self.test_folder}/index.html"