Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 128 additions & 1 deletion ocs_ci/utility/ibmcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
from ocs_ci.utility.utils import get_infra_id, get_ocp_version, run_cmd, TimeoutSampler
from ocs_ci.ocs.node import get_nodes


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -1477,6 +1476,134 @@ def configure_ingress_load_balancer_security_group():
raise


def _get_lb_security_groups(svc_name, namespace):
"""
Look up the IBM Cloud VPC load balancer backing a Kubernetes
LoadBalancer Service and return its security groups.

Args:
svc_name (str): Kubernetes Service name
namespace (str): Kubernetes namespace

Returns:
list: security group dicts from the VPC LB, empty list on
failure

"""
rg_name = get_resource_group_name(config.ENV_DATA["cluster_path"])

svc_ocp = OCP(
kind="Service",
namespace=namespace,
resource_name=svc_name,
)
svc_data = svc_ocp.get()

lb_ingress = svc_data.get("status", {}).get("loadBalancer", {}).get("ingress", [])
if not lb_ingress:
logger.warning(
f"No LB ingress on service {svc_name}, cannot configure " f"security group"
)
return []

lb_hostname = lb_ingress[0].get("hostname") or lb_ingress[0].get("ip")
if not lb_hostname:
logger.warning(f"No hostname/IP in LB ingress for service {svc_name}")
return []

logger.debug(f"LB endpoint for {svc_name}: {lb_hostname}")

cmd = f"ibmcloud is lbs --resource-group-name {rg_name} " f"--output json"
out = run_ibmcloud_cmd(cmd)
load_balancers = json.loads(out)

matching_lb = None
for lb in load_balancers:
if lb.get("hostname") == lb_hostname:
matching_lb = lb
break

if not matching_lb:
logger.error(f"Could not find IBM Cloud VPC LB with hostname {lb_hostname}")
return []

security_groups = matching_lb.get("security_groups", [])
if not security_groups:
logger.warning(f"No security groups on LB {matching_lb.get('name')}")
return security_groups


def configure_nfs_lb_security_group():
"""
Add an inbound TCP rule for port 2049 (NFS) to the security
groups attached to the NFS LoadBalancer on IBM Cloud VPC.

Must be called after the ``rook-ceph-nfs-my-nfs-load-balancer``
Service has an ingress address assigned.
"""
svc_name = "rook-ceph-nfs-my-nfs-load-balancer"
namespace = constants.OPENSHIFT_STORAGE_NAMESPACE
logger.info("Configuring NFS LB security group for port 2049")

security_groups = _get_lb_security_groups(svc_name, namespace)
for sg in security_groups:
sg_name = sg.get("name")
try:
logger.info(f"Adding inbound TCP 2049 to {sg_name}")
add_security_group_rule(sg_name, "inbound", "tcp", 2049, 2049)
except Exception as e:
logger.warning(
f"Failed to add port 2049 rule to {sg_name} "
f"(may already exist): {e}"
)

logger.info("NFS LB security group configuration done")


def remove_nfs_lb_security_group_rules():
"""
Remove inbound TCP 2049 rules from the security groups attached
to the NFS LoadBalancer on IBM Cloud VPC.

Should be called before deleting the NFS LoadBalancer Service so
the VPC LB is still present for look-up.
"""
svc_name = "rook-ceph-nfs-my-nfs-load-balancer"
namespace = constants.OPENSHIFT_STORAGE_NAMESPACE
logger.info("Removing NFS LB security group rules for port 2049")

security_groups = _get_lb_security_groups(svc_name, namespace)
for sg in security_groups:
sg_id = sg.get("id")
sg_name = sg.get("name")
try:
cmd = f"ibmcloud is security-group {sg_id} " f"--output json"
out = run_ibmcloud_cmd(cmd)
sg_detail = json.loads(out)
except Exception as e:
logger.warning(f"Could not fetch rules for {sg_name}: {e}")
continue

for rule in sg_detail.get("rules", []):
if (
rule.get("direction") == "inbound"
and rule.get("protocol") == "tcp"
and rule.get("port_min") == 2049
and rule.get("port_max") == 2049
):
rule_id = rule.get("id")
logger.info(f"Deleting rule {rule_id} from {sg_name}")
try:
run_ibmcloud_cmd(
f"ibmcloud is security-group-rule-delete "
f"{sg_id} {rule_id} --force"
)
except Exception as e:
logger.warning(f"Failed to delete rule {rule_id}: {e}")

logger.info("NFS LB security group cleanup done")


def create_address_prefix(prefix_name, vpc, zone, cidr):
"""
Create address prefix in VPC.
Expand Down
25 changes: 21 additions & 4 deletions ocs_ci/utility/nfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,22 @@ def create_nfs_load_balancer_service(
if "hostname" in host_details:
hostname_add = host_details["hostname"]
log.info("ingress hostname, %s", hostname_add)
return hostname_add
elif "ip" in host_details:
host_ip = host_details["ip"]
log.info("ingress host ip, %s", host_ip)
return host_ip
hostname_add = host_details["ip"]
log.info(f"ingress host ip, {hostname_add}")
else:
log.error("host details unavailable")
return None

platform = config.ENV_DATA.get("platform", "").lower()
if platform == constants.IBMCLOUD_PLATFORM:
from ocs_ci.utility.ibmcloud import (
configure_nfs_lb_security_group,
)

configure_nfs_lb_security_group()

return hostname_add


def update_etc_hosts_on_nfs_client(con, hostname):
Expand Down Expand Up @@ -305,6 +314,14 @@ def delete_nfs_load_balancer_service(
)
return

platform = config.ENV_DATA.get("platform", "").lower()
if platform == constants.IBMCLOUD_PLATFORM:
from ocs_ci.utility.ibmcloud import (
remove_nfs_lb_security_group_rules,
)

remove_nfs_lb_security_group_rules()

log.info("Deleting NFS LoadBalancer service %s", svc_name)
storage_cluster_obj.exec_oc_cmd(f"delete service {svc_name}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,28 @@ def __make_connection():
nfs_utils.update_etc_hosts_on_nfs_client(con, hostname_add)
return con

def _mount_nfs_with_retry(self, cmd, tries=28, delay=10):
"""
Execute an NFS mount command on the client VM with retry.

Args:
cmd (str): Mount command to execute on the NFS client VM
tries (int): Number of retry attempts (default: 28)
delay (int): Delay in seconds between retries (default: 10)

Raises:
CommandFailed: If mount does not succeed within the retry limit
"""

def _do_mount():
retcode, _, stderr = self.con.exec_cmd(cmd)
if retcode != 0:
raise CommandFailed(
f"NFS mount command failed with retcode " f"{retcode}: {stderr}"
)

retry((CommandFailed), tries=tries, delay=delay)(_do_mount)()

@tier1
@polarion_id("OCS-4269")
@skipif_hci_client
Expand Down Expand Up @@ -573,11 +595,7 @@ def test_outcluster_nfs_export(
+ self.test_folder
)

retry(
(CommandFailed),
tries=28,
delay=10,
)(self.con.exec_cmd(export_nfs_external_cmd))
self._mount_nfs_with_retry(export_nfs_external_cmd)

# Verify able to read exported volume
command = f"cat {self.test_folder}/index.html"
Expand Down Expand Up @@ -745,11 +763,7 @@ def test_multiple_nfs_based_PVs(
+ " "
+ self.test_folder
)
retry(
(CommandFailed),
tries=28,
delay=10,
)(self.con.exec_cmd(export_nfs_external_cmd))
self._mount_nfs_with_retry(export_nfs_external_cmd)

# Verify able to access exported volume
command = f"cat {self.test_folder}/index.html"
Expand Down Expand Up @@ -878,11 +892,7 @@ def test_multiple_mounts_of_same_nfs_volume(
+ " "
+ self.test_folder
)
retry(
(CommandFailed),
tries=28,
delay=10,
)(self.con.exec_cmd(export_nfs_external_cmd))
self._mount_nfs_with_retry(export_nfs_external_cmd)

# Verify able to access exported volume
command = f"cat {self.test_folder}/shared_file.html"
Expand Down Expand Up @@ -1011,11 +1021,7 @@ def test_external_nfs_client_can_write_read_new_file(
+ " "
+ self.test_folder
)
retry(
(CommandFailed),
tries=28,
delay=10,
)(self.con.exec_cmd(export_nfs_external_cmd))
self._mount_nfs_with_retry(export_nfs_external_cmd)

# Verify able to write new file in exported volume by external client
command = (
Expand Down Expand Up @@ -1616,16 +1622,24 @@ def test_nfs_pvc_subvolume_deletion(

# Checking for stale volumes
output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale")
stale_before = self.parse_subvolume_ls_output(output)
log.info(f"Stale subvolumes before delete: {stale_before}")

# Deleteing stale subvolume
exec_cmd(
cmd=f"{odf_cli_path} subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}"
# Deleting stale subvolume
delete_output = exec_cmd(
cmd=f"{odf_cli_path} subvolume delete"
f" {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}"
)
log.info(f"Subvolume delete output: {delete_output.stdout}")

# Checking for stale volumes
# Verify the specific subvolume was deleted
output = exec_cmd(cmd=f"{odf_cli_path} subvolume ls --stale")
stale_volumes = self.parse_subvolume_ls_output(output)
assert len(stale_volumes) == 0 # No stale volumes available
stale_after = self.parse_subvolume_ls_output(output)
log.info(f"Stale subvolumes after delete: {stale_after}")
stale_svs = {sv[1] for sv in stale_after}
assert (
new_pvc[1] not in stale_svs
), f"Subvolume {new_pvc[1]} still stale after delete"

# Delete ocs-storagecluster-ceph-nfs-retain storageclass
self.sc_obj.delete(resource_name=self.retain_nfs_sc_name)
Expand Down Expand Up @@ -1779,11 +1793,7 @@ def test_incluster_outcluster_nfs_export_for_non_default_nfs_sc(
+ self.test_folder
)

retry(
(CommandFailed),
tries=28,
delay=10,
)(self.con.exec_cmd(export_nfs_external_cmd))
self._mount_nfs_with_retry(export_nfs_external_cmd)

# Verify able to read exported volume
command = f"cat {self.test_folder}/index.html"
Expand Down
Loading