diff --git a/ocs_ci/framework/pytest_customization/marks.py b/ocs_ci/framework/pytest_customization/marks.py index 0a2a1c2be11c..fdfa4aaf8120 100644 --- a/ocs_ci/framework/pytest_customization/marks.py +++ b/ocs_ci/framework/pytest_customization/marks.py @@ -547,6 +547,11 @@ # Marker for skipping tests for provider clusters based on OCS version skip_for_provider_if_ocs_version = pytest.mark.skip_for_provider_if_ocs_version +# Marker for skipping tests for provider or client clusters based on OCS version +skip_for_provider_or_client_if_ocs_version = ( + pytest.mark.skip_for_provider_or_client_if_ocs_version +) + skipif_rosa = pytest.mark.skipif( config.ENV_DATA["platform"].lower() == ROSA_PLATFORM, reason="Test will not run on ROSA cluster", diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 354ade13c6d0..a13fa2168199 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -3514,6 +3514,8 @@ NFS_CSI_PLUGIN_LABEL = "app=csi-nfsplugin" NFS_CSI_CTRLPLUGIN_LABEL_419 = "app=openshift-storage.nfs.csi.ceph.com-ctrlplugin" NFS_CSI_NODEPLUGIN_LABEL_419 = "app=openshift-storage.nfs.csi.ceph.com-nodeplugin" +NFS_STORAGECLASS_NAME = "ocs-storagecluster-ceph-nfs" +COPY_NFS_STORAGECLASS_NAME = "ocs-storagecluster-ceph-nfs-copy" # The expected mds cache memory values MDS_CACHE_MEMORY = 3221225472 LOWER_REQ_MDS_CACHE_MEMORY = 1073741824 diff --git a/ocs_ci/utility/ibmcloud.py b/ocs_ci/utility/ibmcloud.py index 55ae99e4f43e..acdf28dfdfb9 100644 --- a/ocs_ci/utility/ibmcloud.py +++ b/ocs_ci/utility/ibmcloud.py @@ -36,9 +36,7 @@ from ocs_ci.utility.utils import get_infra_id, get_ocp_version, run_cmd, TimeoutSampler from ocs_ci.ocs.node import get_nodes - -logger = logging.getLogger(name=__file__) -ibm_config = config.AUTH.get("ibmcloud", {}) +logger = logging.getLogger(__name__) def login(region=None, resource_group=None): @@ -50,6 +48,13 @@ def login(region=None, resource_group=None): resource_group (str): resource group to log in, if not specified it will use one from config or nothing if not defined """ + platform = config.ENV_DATA["platform"] + if platform != constants.IBMCLOUD_PLATFORM: + logger.info( + f"Skipping IBM Cloud login as platform: {platform} is not IBM Cloud" + ) + return + ibm_config = config.AUTH.get("ibmcloud", {}) api_key = ibm_config["api_key"] login_cmd = f"ibmcloud login --apikey {api_key}" account_id = ibm_config.get("account_id") @@ -85,7 +90,9 @@ def set_region(region=None): region (str): region to set, if not defined it will try to get from metadata.json """ - if not config.ENV_DATA.get("enable_region_dynamic_switching"): + if not config.ENV_DATA.get("enable_region_dynamic_switching") or ( + config.ENV_DATA["platform"] != constants.IBMCLOUD_PLATFORM + ): return if not region: region = get_region(config.ENV_DATA["cluster_path"]) @@ -127,6 +134,27 @@ def get_region(cluster_path): return metadata["ibmcloud"]["region"] +def get_ibmcloud_cluster_region(): + """ + Get IBM Cloud region from the cluster's infrastructure object. + + This function queries the live cluster to retrieve the IBM Cloud region + from the infrastructure status, which may differ from the metadata.json file. + + Returns: + str: IBM Cloud region where the cluster is deployed + + Raises: + CommandFailed: If the oc command fails + """ + ocp_obj = OCP() + region = ocp_obj.exec_oc_cmd( + "get infrastructure cluster -o jsonpath='{.status.platformStatus.ibmcloud.location}'" + ) + logger.info(f"IBM Cloud cluster region: {region}") + return region.strip() + + def get_resource_group_name(cluster_path): """ Get resource group from metadata.json in given cluster_path @@ -505,7 +533,7 @@ def restart_nodes(self, nodes, timeout=900, wait=True): for node in nodes: worker_id = node.get()["spec"]["providerID"].split("/")[-1] - cmd = f"ibmcloud ks worker reboot --cluster {cluster_id} --worker {worker_id} -f --hard" + cmd = f"ibmcloud ks worker reboot --cluster {cluster_id} --worker {worker_id} -f" out = run_ibmcloud_cmd(cmd) logger.info(f"Node restart command output: {out}") @@ -734,7 +762,7 @@ def restart_nodes_by_stop_and_start(self, nodes, force=True): if len(worker_nodes_not_ready) > 0: for not_ready_node in worker_nodes_not_ready: - cmd = f"ibmcloud ks worker reboot --cluster {cluster_id} --worker {not_ready_node} -f --hard" + cmd = f"ibmcloud ks worker reboot --cluster {cluster_id} --worker {not_ready_node} -f" out = run_ibmcloud_cmd(cmd) logger.info(f"Node restart command output: {out}") @@ -1150,7 +1178,9 @@ def delete_account_policy(policy_id, token=None): token = get_api_token() url = f"https://iam.cloud.ibm.com/v1/policies/{policy_id}" headers = {"Authorization": f"Bearer {token}"} - response = requests.delete(url.format("YOUR_POLICY_ID_HERE"), headers=headers) + response = requests.delete( + url.format("YOUR_POLICY_ID_HERE"), headers=headers, timeout=120 + ) if response.status_code == 204: # 204 means success (No Content) logger.info(f"Policy id: {policy_id} deleted successfully.") else: @@ -1208,18 +1238,70 @@ def create_vpc(cluster_name, resource_group): ) -def get_used_subnets(): +def get_used_subnets(vpc_id=""): """ Get currently used subnets in IBM Cloud + Args: + vpc_id (str): VPC ID to filter subnets. Empty string means all VPCs. + Returns: list: subnets """ - subnets_data = json.loads(run_ibmcloud_cmd("ibmcloud is subnets --output json")) + subnets_data = json.loads( + run_ibmcloud_cmd(f"ibmcloud is subnets --vpc '{vpc_id}' --output json") + ) return [subnet["ipv4_cidr_block"] for subnet in subnets_data] +def get_security_groups(vpc_id="", resource_group_id="", resource_group_name=""): + """ + Get security groups in IBM Cloud + + Args: + vpc_id (str): VPC ID to filter security groups, if empty it will return security groups for all VPCs. + resource_group_id (str): Resource group ID to filter security groups. + resource_group_name (str): Resource group name to filter security groups. + + Returns: + list: security groups + + """ + cmd = "ibmcloud is security-groups --output json" + if vpc_id: + cmd += f" --vpc '{vpc_id}'" + if resource_group_id: + cmd += f" --resource-group-id '{resource_group_id}'" + if resource_group_name: + cmd += f" --resource-group-name '{resource_group_name}'" + sg_data = json.loads(run_ibmcloud_cmd(cmd)) + return sg_data + + +def get_security_group_id( + sg_name, vpc_id="", resource_group_id="", resource_group_name="" +): + """ + Get security group ID by name + + Args: + sg_name (str): security group name + vpc_id (str): VPC ID to filter security groups, if empty it will return security groups for all VPCs. + resource_group_id (str): Resource group ID to filter security groups. + resource_group_name (str): Resource group name to filter security groups + + Returns: + str: security group ID or empty string if not found + + """ + sgs = get_security_groups(vpc_id, resource_group_id, resource_group_name) + for sg in sgs: + if sg["name"] == sg_name: + return sg["id"] + return "" + + def add_security_group_rule( security_group, direction, protocol, port_min, port_max, **kwargs ): @@ -1245,6 +1327,53 @@ def add_security_group_rule( run_ibmcloud_cmd(cmd) +def get_security_group_name_by_pattern( + sg_name_pattern, vpc_id="", resource_group_id="", resource_group_name="" +): + """ + Get security group name by pattern + + Args: + sg_name_pattern (str): security group name pattern (regular expression) + vpc_id (str): VPC ID to filter security groups, if empty it will return security groups for all VPCs + resource_group_id (str): Resource group ID to filter security groups + resource_group_name (str): Resource group name to filter security groups + + Returns: + str: security group name or empty string if not found + + """ + sgs = get_security_groups(vpc_id, resource_group_id, resource_group_name) + for sg in sgs: + if re.search(sg_name_pattern, sg["name"]): + return sg["name"] + return "" + + +def open_ports_on_ibmcloud_hub_cluster(): + """ + Add the inbound rules for these cluster security configs `-cluster-wide` and `-openshift-net` to open the + following ports: 3300, 6789, 9283, 6800-7300 and 31659 + """ + rg_name = get_resource_group_name(config.ENV_DATA["cluster_path"]) + sg_names = [ + get_security_group_name_by_pattern( + r"cluster-wide$", resource_group_name=rg_name + ), + get_security_group_name_by_pattern( + r"openshift-net$", resource_group_name=rg_name + ), + ] + + for sg_name in sg_names: + add_security_group_rule(sg_name, "inbound", "tcp", 3300, 3300) + add_security_group_rule(sg_name, "inbound", "tcp", 6789, 6789) + add_security_group_rule(sg_name, "inbound", "tcp", 9283, 9283) + add_security_group_rule(sg_name, "inbound", "tcp", 6800, 7300) + add_security_group_rule(sg_name, "inbound", "tcp", 31659, 31659) + logger.info("Inbound rules added successfully") + + def configure_ingress_load_balancer_security_group(): """ Add inbound rules for ports 80 and 443 to the default ingress load balancer security group. @@ -1347,6 +1476,134 @@ def configure_ingress_load_balancer_security_group(): raise +def _get_lb_security_groups(svc_name, namespace): + """ + Look up the IBM Cloud VPC load balancer backing a Kubernetes + LoadBalancer Service and return its security groups. + + Args: + svc_name (str): Kubernetes Service name + namespace (str): Kubernetes namespace + + Returns: + list: security group dicts from the VPC LB, empty list on + failure + + """ + rg_name = get_resource_group_name(config.ENV_DATA["cluster_path"]) + + svc_ocp = OCP( + kind="Service", + namespace=namespace, + resource_name=svc_name, + ) + svc_data = svc_ocp.get() + + lb_ingress = svc_data.get("status", {}).get("loadBalancer", {}).get("ingress", []) + if not lb_ingress: + logger.warning( + f"No LB ingress on service {svc_name}, cannot configure " f"security group" + ) + return [] + + lb_hostname = lb_ingress[0].get("hostname") or lb_ingress[0].get("ip") + if not lb_hostname: + logger.warning(f"No hostname/IP in LB ingress for service {svc_name}") + return [] + + logger.debug(f"LB endpoint for {svc_name}: {lb_hostname}") + + cmd = f"ibmcloud is lbs --resource-group-name {rg_name} " f"--output json" + out = run_ibmcloud_cmd(cmd) + load_balancers = json.loads(out) + + matching_lb = None + for lb in load_balancers: + if lb.get("hostname") == lb_hostname: + matching_lb = lb + break + + if not matching_lb: + logger.error(f"Could not find IBM Cloud VPC LB with hostname {lb_hostname}") + return [] + + security_groups = matching_lb.get("security_groups", []) + if not security_groups: + logger.warning(f"No security groups on LB {matching_lb.get('name')}") + return security_groups + + +def configure_nfs_lb_security_group(): + """ + Add an inbound TCP rule for port 2049 (NFS) to the security + groups attached to the NFS LoadBalancer on IBM Cloud VPC. + + Must be called after the ``rook-ceph-nfs-my-nfs-load-balancer`` + Service has an ingress address assigned. + """ + svc_name = "rook-ceph-nfs-my-nfs-load-balancer" + namespace = constants.OPENSHIFT_STORAGE_NAMESPACE + logger.info("Configuring NFS LB security group for port 2049") + + security_groups = _get_lb_security_groups(svc_name, namespace) + for sg in security_groups: + sg_name = sg.get("name") + try: + logger.info(f"Adding inbound TCP 2049 to {sg_name}") + add_security_group_rule(sg_name, "inbound", "tcp", 2049, 2049) + except Exception as e: + logger.warning( + f"Failed to add port 2049 rule to {sg_name} " + f"(may already exist): {e}" + ) + + logger.info("NFS LB security group configuration done") + + +def remove_nfs_lb_security_group_rules(): + """ + Remove inbound TCP 2049 rules from the security groups attached + to the NFS LoadBalancer on IBM Cloud VPC. + + Should be called before deleting the NFS LoadBalancer Service so + the VPC LB is still present for look-up. + """ + svc_name = "rook-ceph-nfs-my-nfs-load-balancer" + namespace = constants.OPENSHIFT_STORAGE_NAMESPACE + logger.info("Removing NFS LB security group rules for port 2049") + + security_groups = _get_lb_security_groups(svc_name, namespace) + for sg in security_groups: + sg_id = sg.get("id") + sg_name = sg.get("name") + try: + cmd = f"ibmcloud is security-group {sg_id} " f"--output json" + out = run_ibmcloud_cmd(cmd) + sg_detail = json.loads(out) + except Exception as e: + logger.warning(f"Could not fetch rules for {sg_name}: {e}") + continue + + for rule in sg_detail.get("rules", []): + if ( + rule.get("direction") == "inbound" + and rule.get("protocol") == "tcp" + and rule.get("port_min") == 2049 + and rule.get("port_max") == 2049 + ): + rule_id = rule.get("id") + logger.info(f"Deleting rule {rule_id} from {sg_name}") + try: + run_ibmcloud_cmd( + f"ibmcloud is security-group-rule-delete " + f"{sg_id} {rule_id} --force" + ) + except Exception as e: + logger.warning(f"Failed to delete rule {rule_id}: {e}") + + logger.info("NFS LB security group cleanup done") + + def create_address_prefix(prefix_name, vpc, zone, cidr): """ Create address prefix in VPC. diff --git a/ocs_ci/utility/nfs_utils.py b/ocs_ci/utility/nfs_utils.py index 92bd99760274..b5827f7c0ad1 100644 --- a/ocs_ci/utility/nfs_utils.py +++ b/ocs_ci/utility/nfs_utils.py @@ -190,13 +190,22 @@ def create_nfs_load_balancer_service( if "hostname" in host_details: hostname_add = host_details["hostname"] log.info("ingress hostname, %s", hostname_add) - return hostname_add elif "ip" in host_details: - host_ip = host_details["ip"] - log.info("ingress host ip, %s", host_ip) - return host_ip + hostname_add = host_details["ip"] + log.info(f"ingress host ip, {hostname_add}") else: log.error("host details unavailable") + return None + + platform = config.ENV_DATA.get("platform", "").lower() + if platform == constants.IBMCLOUD_PLATFORM: + from ocs_ci.utility.ibmcloud import ( + configure_nfs_lb_security_group, + ) + + configure_nfs_lb_security_group() + + return hostname_add def update_etc_hosts_on_nfs_client(con, hostname): @@ -291,6 +300,14 @@ def delete_nfs_load_balancer_service( ) return + platform = config.ENV_DATA.get("platform", "").lower() + if platform == constants.IBMCLOUD_PLATFORM: + from ocs_ci.utility.ibmcloud import ( + remove_nfs_lb_security_group_rules, + ) + + remove_nfs_lb_security_group_rules() + log.info("Deleting NFS LoadBalancer service %s", svc_name) storage_cluster_obj.exec_oc_cmd(f"delete service {svc_name}") diff --git a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py index 67e98a54230d..89ea8631c690 100644 --- a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py +++ b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py @@ -5,6 +5,7 @@ import os import socket +from subprocess import CompletedProcess from ocs_ci.utility import nfs_utils from ocs_ci.utility.utils import exec_cmd from ocs_ci.framework import config @@ -25,17 +26,23 @@ tier4c, skipif_ocp_version, skipif_managed_service, - skip_for_provider_if_ocs_version, + skip_for_provider_or_client_if_ocs_version, skipif_disconnected_cluster, skipif_proxy_cluster, polarion_id, nfs_outcluster_test_platform_required, skipif_external_mode, + skipif_hci_client, + hci_client_required, ) - +from ocs_ci.utility import version as version_module from ocs_ci.ocs.resources import pod, ocs from ocs_ci.utility.retry import retry from ocs_ci.ocs.exceptions import CommandFailed, ConfigurationError +from ocs_ci.ocs.resources.pod import ( + get_all_pods, +) +from ocs_ci.utility.nfs_utils import provisioner_selectors log = logging.getLogger(__name__) # Error message to look in a command output @@ -49,7 +56,8 @@ @skipif_managed_service @skipif_disconnected_cluster @skipif_proxy_cluster -@skip_for_provider_if_ocs_version("<4.19") +@skipif_hci_client +@skip_for_provider_or_client_if_ocs_version("<4.19") @skipif_lean_deployment @polarion_id("OCS-4270") class TestDefaultNfsDisabled(ManageTest): @@ -64,17 +72,44 @@ def test_nfs_not_enabled_by_default(self): Steps: 1:- Check cephnfs resources not available by default - + 2:- Validate no NFS pods exist """ + storage_cluster_obj = ocp.OCP( kind="Storagecluster", namespace=config.ENV_DATA["cluster_namespace"] ) # Checks cephnfs resources not available by default + log.info("Checking if any cephnfs resources exist") cephnfs_resource = storage_cluster_obj.exec_oc_cmd("get cephnfs") if cephnfs_resource is None: - log.info("No resources found in openshift-storage namespace.") + log.info("No cephnfs resources found. NFS should be disabled.") + pod_objs = get_all_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) + nfs_pod_selectors = provisioner_selectors(nfs_plugins=True) + + nfs_pods = [] + + for p in pod_objs: + labels = p.get().get("metadata", {}).get("labels", {}) + + for selector in nfs_pod_selectors: + key, value = selector.split("=", 1) + + if labels.get(key) == value: + nfs_pods.append(p.name) + break + + if nfs_pods: + unique_nfs_pods = list(set(nfs_pods)) + pytest.fail( + f"NFS pods found when NFS should be disabled: {unique_nfs_pods}" + ) + else: + log.info("No NFS pods found. NFS is correctly disabled.") + else: - log.error("nfs feature is enabled by default") + pytest.fail( + "cephnfs resources exist. NFS is unexpectedly enabled by default." + ) @brown_squad @@ -83,7 +118,7 @@ def test_nfs_not_enabled_by_default(self): @skipif_ocs_version("<4.11") @skipif_ocp_version("<4.11") @skipif_managed_service -@skip_for_provider_if_ocs_version("<4.19") +@skip_for_provider_or_client_if_ocs_version("<4.19") @skipif_disconnected_cluster @skipif_proxy_cluster @skipif_lean_deployment @@ -94,7 +129,10 @@ class TestNfsEnable(ManageTest): """ @pytest.fixture(scope="class", autouse=True) - def setup_teardown(self, request): + def setup_teardown( + self, + request, + ): """ Setup-Teardown for the class @@ -142,7 +180,7 @@ def setup_teardown(self, request): self.service_obj = ocp.OCP(kind=constants.SERVICE, namespace=self.namespace) self.pvc_obj = ocp.OCP(kind=constants.PVC, namespace=self.namespace) self.pv_obj = ocp.OCP(kind=constants.PV, namespace=self.namespace) - self.nfs_sc = "ocs-storagecluster-ceph-nfs" + self.nfs_sc = constants.NFS_STORAGECLASS_NAME self.sc = ocs.OCS(kind=constants.STORAGECLASS, metadata={"name": self.nfs_sc}) self.retain_nfs_sc_name = "ocs-storagecluster-ceph-nfs-retain" platform = config.ENV_DATA.get("platform", "").lower() @@ -162,42 +200,77 @@ def setup_teardown(self, request): # Enable nfs feature log.info("----Enable nfs----") - nfs_ganesha_pod_name = nfs_utils.nfs_enable( - self.storage_cluster_obj, - self.config_map_obj, - self.pod_obj, - self.namespace, - ) - if ( - platform == constants.AWS_PLATFORM - or platform == constants.IBMCLOUD_PLATFORM - or platform == constants.HCI_BAREMETAL + config.default_cluster_ctx.ENV_DATA["cluster_type"].lower() + == constants.HCI_CLIENT ): - # Create loadbalancer service for nfs - self.hostname_add = nfs_utils.create_nfs_load_balancer_service( + nfs_ganesha_pod, self.hostname_add = nfs_utils.nfs_access_for_clients( + self.nfs_sc + ) + + # Create a duplicate sc of nfs-sc and update the server details with hostname_add + if ( + version_module.get_semantic_ocs_version_from_config() + < version_module.VERSION_4_21 + ): + _ = nfs_utils.create_nfs_sc( + sc_name_to_create=constants.COPY_NFS_STORAGECLASS_NAME, + sc_name_to_copy=self.nfs_sc, + server=self.hostname_add, + ) + self.nfs_sc = constants.COPY_NFS_STORAGECLASS_NAME + yield + # Remove NFS SC from distributed storage classes on the provider + nfs_utils.remove_nfs_storage_class_from_all_consumers( + constants.NFS_STORAGECLASS_NAME + ) + # Disable nfs feature + nfs_utils.disable_nfs_service_from_provider(self.sc, nfs_ganesha_pod) + + # delete nfs non default storageclass if available + if ocp.OCP(kind=constants.STORAGECLASS).is_exist( + resource_name=constants.COPY_NFS_STORAGECLASS_NAME + ): + self.sc_obj.delete(resource_name=constants.COPY_NFS_STORAGECLASS_NAME) + + else: + nfs_ganesha_pod_name = nfs_utils.nfs_enable( self.storage_cluster_obj, + self.config_map_obj, + self.pod_obj, + self.namespace, ) - yield - log.info("-----Teardown-----") - # Disable nfs feature - nfs_utils.nfs_disable( - self.storage_cluster_obj, - self.config_map_obj, - self.pod_obj, - self.sc, - nfs_ganesha_pod_name, - ) - if ( - platform == constants.AWS_PLATFORM - or platform == constants.IBMCLOUD_PLATFORM - or platform == constants.HCI_BAREMETAL - ): - # Delete ocs nfs Service - nfs_utils.delete_nfs_load_balancer_service( + if ( + platform == constants.AWS_PLATFORM + or platform == constants.IBMCLOUD_PLATFORM + or platform == constants.HCI_BAREMETAL + ): + # Create loadbalancer service for nfs + self.hostname_add = nfs_utils.create_nfs_load_balancer_service( + self.storage_cluster_obj, + ) + + yield + + log.info("-----Teardown-----") + # Disable nfs feature + nfs_utils.nfs_disable( self.storage_cluster_obj, + self.config_map_obj, + self.pod_obj, + self.sc, + nfs_ganesha_pod_name, ) + if ( + platform == constants.AWS_PLATFORM + or platform == constants.IBMCLOUD_PLATFORM + or platform == constants.HCI_BAREMETAL + ): + # Delete ocs nfs Service + nfs_utils.delete_nfs_load_balancer_service( + self.storage_cluster_obj, + ) def teardown(self): """ @@ -290,8 +363,31 @@ def __make_connection(): nfs_utils.update_etc_hosts_on_nfs_client(con, hostname_add) return con + def _mount_nfs_with_retry(self, cmd, tries=28, delay=10): + """ + Execute an NFS mount command on the client VM with retry. + + Args: + cmd (str): Mount command to execute on the NFS client VM + tries (int): Number of retry attempts (default: 28) + delay (int): Delay in seconds between retries (default: 10) + + Raises: + CommandFailed: If mount does not succeed within the retry limit + """ + + def _do_mount(): + retcode, _, stderr = self.con.exec_cmd(cmd) + if retcode != 0: + raise CommandFailed( + f"NFS mount command failed with retcode " f"{retcode}: {stderr}" + ) + + retry((CommandFailed), tries=tries, delay=delay)(_do_mount)() + @tier1 @polarion_id("OCS-4269") + @skipif_hci_client def test_nfs_feature_enable( self, ): @@ -499,11 +595,7 @@ def test_outcluster_nfs_export( + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to read exported volume command = f"cat {self.test_folder}/index.html" @@ -671,11 +763,7 @@ def test_multiple_nfs_based_PVs( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to access exported volume command = f"cat {self.test_folder}/index.html" @@ -804,11 +892,7 @@ def test_multiple_mounts_of_same_nfs_volume( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to access exported volume command = f"cat {self.test_folder}/shared_file.html" @@ -937,11 +1021,7 @@ def test_external_nfs_client_can_write_read_new_file( + " " + self.test_folder ) - retry( - (CommandFailed), - tries=28, - delay=10, - )(self.con.exec_cmd(export_nfs_external_cmd)) + self._mount_nfs_with_retry(export_nfs_external_cmd) # Verify able to write new file in exported volume by external client command = ( @@ -1074,6 +1154,7 @@ def test_nfs_volume_with_different_accesss_mode( @tier4c @polarion_id("OCS-4284") + @skipif_hci_client def test_respin_of_nfs_plugin_pods_for_incluster_consumer( self, pod_factory, @@ -1330,6 +1411,7 @@ def test_respin_app_pod_exported_nfs_volume_incluster( @tier4c @polarion_id("OCS-4294") + @skipif_hci_client def test_respin_of_cephfs_plugin_provisioner_pods_for_incluster_consumer( self, pod_factory, @@ -1435,10 +1517,12 @@ def test_respin_of_cephfs_plugin_provisioner_pods_for_incluster_consumer( @tier2 @polarion_id("OCS-6193") + @skipif_hci_client def test_nfs_pvc_subvolume_deletion( self, pod_factory, pvc_factory, + odf_cli_setup, ): """ This test is to validate NFS export using a PVC mounted on an app pod (in-cluster) and subvolume @@ -1538,16 +1622,24 @@ def test_nfs_pvc_subvolume_deletion( # Checking for stale volumes output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale") + stale_before = self.parse_subvolume_ls_output(output) + log.info(f"Stale subvolumes before delete: {stale_before}") - # Deleteing stale subvolume - exec_cmd( - cmd=f"{odf_cli_path} subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" + # Deleting stale subvolume + delete_output = exec_cmd( + cmd=f"{odf_cli_path} subvolume delete" + f" {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" ) + log.info(f"Subvolume delete output: {delete_output.stdout}") - # Checking for stale volumes + # Verify the specific subvolume was deleted output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale") - stale_volumes = self.parse_subvolume_ls_output(output) - assert len(stale_volumes) == 0 # No stale volumes available + stale_after = self.parse_subvolume_ls_output(output) + log.info(f"Stale subvolumes after delete: {stale_after}") + stale_svs = {sv[1] for sv in stale_after} + assert ( + new_pvc[1] not in stale_svs + ), f"Subvolume {new_pvc[1]} still stale after delete" # Delete ocs-storagecluster-ceph-nfs-retain storageclass self.sc_obj.delete(resource_name=self.retain_nfs_sc_name) @@ -1555,9 +1647,210 @@ def test_nfs_pvc_subvolume_deletion( self.sc_obj.wait_for_delete(resource_name=self.retain_nfs_sc_name) def parse_subvolume_ls_output(self, output): + if isinstance(output, CompletedProcess): + output = output.stdout.decode("utf-8") subvolumes = [] subvolumes_list = output.strip().split("\n")[1:] for item in subvolumes_list: - fs, sv, svg, status = item.split(" ") + if not item.strip(): + continue + fs, sv, svg, status = item.split() subvolumes.append((fs, sv, svg, status)) return subvolumes + + @tier1 + @skipif_ocs_version("<4.21") + @hci_client_required + def test_default_nfs_server_details_displayed_if_external_endpoint_details_unavailable( + self, + ): + """ + Verify if nfs.externalEndpoint is unavailable in StorageCluster then NFS server endpoint details + will not be available when distributing NFS SC with clients, default server will be displayed + + """ + # remove nfs external endpoint details from storagecluster + nfs_utils.remove_nfs_endpoint_details() + + server = nfs_utils.fetch_nfs_server_details_on_client_cluster( + default_server=True + ) + # validate default nfs server details is displayed + assert ( + server == constants.NFS_DEFAULT_SERVICE_NAME + ), f"Expected default NFS server service, got: {server}" + + # Update nfs external endpoint details in storagecluster + # switch to provider + config.switch_to_provider() + nfs_utils.update_nfs_endpoint(self.hostname_add) + + @tier1 + @nfs_outcluster_test_platform_required + @skipif_ocs_version("<4.21") + # @polarion_id("OCS-4272") + def test_incluster_outcluster_nfs_export_for_non_default_nfs_sc( + self, + pod_factory, + ): + """ + This test is to validate NFS incluster and outcluster exports using + for non default nfs storageclass + + Steps: + 1:- Create a new nfs storageclass + and create nfs pvcs with the storageclass + 2:- Create pods with nfs pvcs mounted + 3:- Run IO + 4:- Wait for IO completion + 5:- Verify presence of the file + 6:- Deletion of Pods and PVCs + + """ + nfs_utils.skip_test_if_nfs_client_unavailable(self.nfs_client_ip) + + _ = nfs_utils.create_nfs_sc( + sc_name_to_create=constants.COPY_NFS_STORAGECLASS_NAME, + sc_name_to_copy=self.nfs_sc, + server=self.hostname_add, + ) + self.nfs_sc = constants.COPY_NFS_STORAGECLASS_NAME + + # Create nfs pvcs with storageclass ocs-storagecluster-ceph-nfs + nfs_pvc_obj = helpers.create_pvc( + sc_name=self.nfs_sc, + namespace=self.namespace, + size="5Gi", + do_reload=True, + access_mode=constants.ACCESS_MODE_RWO, + volume_mode="Filesystem", + ) + + # Create nginx pod with nfs pvcs mounted + pod_obj = pod_factory( + interface=constants.CEPHFILESYSTEM, + pvc=nfs_pvc_obj, + status=constants.STATUS_RUNNING, + ) + # Fetch sharing details for the nfs pvc + fetch_vol_name_cmd = ( + "get pvc " + nfs_pvc_obj.name + " --output jsonpath='{.spec.volumeName}'" + ) + vol_name = self.pvc_obj.exec_oc_cmd(fetch_vol_name_cmd) + log.info(f"For pvc {nfs_pvc_obj.name} volume name is, {vol_name}") + fetch_pv_share_cmd = ( + "get pv " + + vol_name + + " --output jsonpath='{.spec.csi.volumeAttributes.share}'" + ) + share_details = self.pv_obj.exec_oc_cmd(fetch_pv_share_cmd) + log.info(f"Share details is, {share_details}") + + file_name = pod_obj.name + # Run IO + pod_obj.run_io( + storage_type="fs", + size="4G", + fio_filename=file_name, + runtime=60, + ) + log.info("IO started on all pods") + + # Wait for IO completion + fio_result = pod_obj.get_fio_results() + log.info("IO completed on all pods") + err_count = fio_result.get("jobs")[0].get("error") + assert err_count == 0, ( + f"IO error on pod {pod_obj.name}. " f"FIO result: {fio_result}" + ) + # Verify presence of the file + file_path = pod.get_file_path(pod_obj, file_name) + log.info(f"Actual file path on the pod {file_path}") + assert pod.check_file_existence( + pod_obj, file_path + ), f"File {file_name} doesn't exist" + log.info(f"File {file_name} exists in {pod_obj.name}") + + # Create /var/lib/www/html/index.html file inside the pod + command = ( + "bash -c " + + '"echo ' + + "'hello world'" + + ' > /var/lib/www/html/index.html"' + ) + pod_obj.exec_cmd_on_pod( + command=command, + out_yaml_format=False, + ) + retcode, _, _ = self.con.exec_cmd("mkdir -p " + self.test_folder) + assert retcode == 0 + export_nfs_external_cmd = ( + "mount -t nfs4 -o proto=tcp " + + self.hostname_add + + ":" + + share_details + + " " + + self.test_folder + ) + + self._mount_nfs_with_retry(export_nfs_external_cmd) + + # Verify able to read exported volume + command = f"cat {self.test_folder}/index.html" + retcode, stdout, _ = self.con.exec_cmd(command) + stdout = stdout.rstrip() + log.info(stdout) + assert stdout == "hello world" + command = f"chmod 666 {self.test_folder}/index.html" + retcode, _, _ = self.con.exec_cmd(command) + assert retcode == 0 + + # Verify able to write to the exported volume + command = ( + "bash -c " + + '"echo ' + + "'test_writing'" + + f' >> {self.test_folder}/index.html"' + ) + retcode, _, stderr = self.con.exec_cmd(command) + assert retcode == 0, f"failed with error---{stderr}" + + command = f"cat {self.test_folder}/index.html" + retcode, stdout, _ = self.con.exec_cmd(command) + assert retcode == 0 + stdout = stdout.rstrip() + assert stdout == "hello world" + """\n""" + "test_writing" + + # Able to read updated /var/lib/www/html/index.html file from inside the pod + command = "bash -c " + '"cat ' + ' /var/lib/www/html/index.html"' + result = pod_obj.exec_cmd_on_pod( + command=command, + out_yaml_format=False, + ) + assert result.rstrip() == "hello world" + """\n""" + "test_writing" + + # Unmount + nfs_utils.unmount(self.con, self.test_folder) + + # Deletion of Pods and PVCs + log.info("Deleting pod") + pod_obj.delete() + pod_obj.ocp.wait_for_delete( + pod_obj.name, 180 + ), f"Pod {pod_obj.name} is not deleted" + + pv_obj = nfs_pvc_obj.backed_pv_obj + log.info(f"pv object-----{pv_obj}") + + log.info("Deleting PVC") + nfs_pvc_obj.delete() + nfs_pvc_obj.ocp.wait_for_delete( + resource_name=nfs_pvc_obj.name + ), f"PVC {nfs_pvc_obj.name} is not deleted" + log.info(f"Verified: PVC {nfs_pvc_obj.name} is deleted.") + + log.info("Check nfs pv is deleted") + pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=180) + + log.info("delete non default nfs storageclass created for the test") + self.sc_obj.delete(resource_name=constants.COPY_NFS_STORAGECLASS_NAME)