diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index c7e1e66374d..3c4bf0e8572 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -194,7 +194,6 @@ get_acm_mce_build_tag, apply_oadp_workaround, mute_mon_netsplit, - ceph_health_resolve_devicehealth, ) from ocs_ci.utility.vsphere_nodes import update_ntp_compute_nodes from ocs_ci.helpers import helpers @@ -2272,14 +2271,6 @@ def deploy_ocs(self): # https://issues.redhat.com/browse/DFBUGS-4521 if config.DEPLOYMENT.get("arbiter_deployment"): mute_mon_netsplit(namespace=self.namespace) - - # Workaround for DFBUGS-6749: devicehealth module fails when its - # pool cannot be created due to a missing default CRUSH rule. - try: - ceph_health_resolve_devicehealth() - except Exception as ex: - logger.warning(f"devicehealth workaround failed (may not be needed): {ex}") - # Verify health of ceph cluster logger.info("Done creating rook resources, waiting for HEALTH_OK") try: diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index 5dda1098400..5ca3cc916ff 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -92,7 +92,6 @@ TimeoutSampler, convert_device_size, extract_image_urls, - ceph_health_resolve_devicehealth, ) from ocs_ci.utility.decorators import switch_to_orig_index_at_last from ocs_ci.helpers.helpers import storagecluster_independent_check @@ -786,12 +785,6 @@ def ocs_install_verification( # TODO: Enable the check when a solution is identified for tools pod on FaaS consumer if not (fusion_aas_consumer or hci_cluster): - # Workaround for DFBUGS-6749: devicehealth module fails when its - # pool cannot be created due to a missing default CRUSH rule. - try: - ceph_health_resolve_devicehealth() - except Exception as ex: - log.warning(f"devicehealth workaround failed (may not be needed): {ex}") # Temporarily disable health check for hci until we have enough healthy clusters assert utils.ceph_health_check( namespace, diff --git a/ocs_ci/utility/utils.py b/ocs_ci/utility/utils.py index 45e1a2dcadb..5c6f57a6239 100644 --- a/ocs_ci/utility/utils.py +++ b/ocs_ci/utility/utils.py @@ -3128,58 +3128,9 @@ def check_ceph_health_not_ok(): sampler.wait_for_func_status(True) -def ceph_health_resolve_devicehealth(): - """ - Fix ceph health issue where the devicehealth module fails because - its pool cannot be created due to a missing CRUSH rule. - - Workaround: - 1. Set osd_pool_default_crush_rule to 0 (block pool rule) - 2. Restart the devicehealth module so it retries pool creation - 3. Archive any resulting crash reports - - """ - # importing here to avoid circular import - from ocs_ci.ocs.resources.pod import get_ceph_tools_pod - - log.warning( - "Trying to fix devicehealth module failure by setting " - "default CRUSH rule and restarting the module" - ) - ct_pod = get_ceph_tools_pod() - - ct_pod.exec_ceph_cmd( - ceph_cmd="ceph config set mon osd_pool_default_crush_rule 0", - format=None, - out_yaml_format=False, - ) - log.info("Set osd_pool_default_crush_rule to 0") - - ct_pod.exec_ceph_cmd( - ceph_cmd=("ceph mgr module force disable devicehealth --yes-i-really-mean-it"), - format=None, - out_yaml_format=False, - ) - log.info("Force disabled devicehealth module") - - ct_pod.exec_ceph_cmd( - ceph_cmd="ceph mgr module enable devicehealth", - format=None, - out_yaml_format=False, - ) - log.info("Re-enabled devicehealth module") - - # give time to generate crash - time.sleep(180) - - ceph_crash_info_display(ct_pod) - archive_ceph_crashes(ct_pod) - - def ceph_health_resolve_crash(): """ Fix ceph health issue with daemon crash - """ log.warning("Trying to fix the issue with crash by archiving crashes") from ocs_ci.ocs.resources.pod import get_ceph_tools_pod @@ -3298,20 +3249,6 @@ def ceph_health_recover( """ ceph_health_fixes = [ - { - "pattern": r"Module 'devicehealth' has failed", - "func": ceph_health_resolve_devicehealth, - "func_args": [], - "func_kwargs": {}, - "ceph_health_tries": 10, - "ceph_health_delay": 30, - "known_issues": [ - { - "issue": "DFBUGS-6749", - "pattern": r"Module 'devicehealth' has failed", - }, - ], - }, { "pattern": r"daemons have recently crashed", "func": ceph_health_resolve_crash,