diff --git a/ocs_ci/helpers/tlsprofile_helper.py b/ocs_ci/helpers/tlsprofile_helper.py new file mode 100644 index 000000000000..fd131babc120 --- /dev/null +++ b/ocs_ci/helpers/tlsprofile_helper.py @@ -0,0 +1,1059 @@ +""" +Helper for TLSProfile custom resources (ocs.openshift.io/v1) and in-cluster TLS +scanning via :func:`scan_cluster`. The scan logic lives in +``scripts/bash/tls_scan_endpoints.sh`` (loaded at runtime). + +References (DF 4.22+): ``TLSProfile`` centralizes TLS version, ciphers, and groups +for NooBaa and RGW; CR name ``ocs-tls-profile`` in the operator namespace; +``ocs-tls-profiles`` is an OLM dependency (include in disconnected mirroring). +Cipher/group sets follow the product-supported lists (Mozilla Intermediate/Modern +plus PQC groups). On FIPS-enabled clusters, PQ hybrids and ChaCha are not +FIPS 140-2 approved; use the ``skipif_fips_enabled`` pytest mark on tests that +rely on those algorithms. +""" + +import csv +import io +import json +import logging +import os +import re +import uuid + +from ocs_ci.framework import config +from ocs_ci.ocs import constants, defaults +from ocs_ci.ocs.exceptions import CommandFailed, TimeoutExpiredError +from ocs_ci.ocs.ocp import OCP +from ocs_ci.ocs.resources.ocs import OCS +from ocs_ci.utility.utils import TimeoutSampler, exec_cmd + +log = logging.getLogger(__name__) + +# Centralized TLSProfile cipher/group sets — API-compatible with tlsprofiles.ocs.openshift.io +# (DF 4.22 supported config; verify enums via `oc get crd tlsprofiles.ocs.openshift.io -oyaml`). +TLS_PROFILE_V13_CIPHERS = [ + "TLS_AES_128_GCM_SHA256", + "TLS_AES_256_GCM_SHA384", + "TLS_CHACHA20_POLY1305_SHA256", +] +TLS_PROFILE_V13_GROUPS = [ + "secp256r1", + "secp384r1", + "secp521r1", + "X25519", + "X25519MLKEM768", + "SecP256r1MLKEM768", + "SecP384r1MLKEM1024", +] +TLS_PROFILE_V12_CIPHERS = [ + "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", + "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", + "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", + "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", + "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", +] +TLS_PROFILE_V12_GROUPS = [ + "secp256r1", + "secp384r1", + "secp521r1", + "X25519", +] + +# Selector strings for rook Object Gateway TLS (domain form domain or domain/server). +# DF docs list ``noobaa.io``, ``rook.io``, and ``*``; RGW reconciliation uses the +# ceph object store gateway domain ``ceph.rook.io`` in practice. +TLS_PROFILE_SELECTOR_NOOBAA_DOMAIN = "noobaa.io" +TLS_PROFILE_SELECTOR_RGW_DOMAIN = "ceph.rook.io" + +# Heuristic: log lines that likely indicate TLS/handshake/cert/TLSProfile handling failures. +# Use re.IGNORECASE: inline (?i) after "|" is invalid in Python 3.11+. +# Avoid bare "handshake" — it matches WebSocket "handshake request" (not TLS protocol errors). +_TLS_LOG_ERROR_RE = re.compile( + r".*(\berror\b|\bfatal\b|\bpanic\b).*(" + r"\btls\b|\bssl\b|x509|\bcipher\b|TLSProfile|certificate|" + r"handshake failure|\btls\s+handshake\b|\bssl\s+handshake\b" + r")" + r"|.*(\btls\b|TLSProfile|x509|handshake failure).*(\berror\b|\bfatal\b|failed|failure|invalid|reject)", + re.IGNORECASE, +) + +# Transient / non-TLS errors that sometimes mention "tls" or "handshake" in unrelated contexts. +_TLS_LOG_EXCLUDE_RE = re.compile( + r"(?i)connection refused|dial tcp|websocket dial|handshake request|" + r"RPC:\s*Reconnect|reconnect\s*-\s*got error|context deadline exceeded|" + r"i/o timeout|no route to host|broken pipe|\bEOF\b|temporary failure", +) + + +def gather_tls_relevant_pod_names(namespace, component): + """ + Pod names to scan for TLS-related log errors based on test parametrization. + + Always includes ocs-operator and rook-ceph-operator; adds NooBaa / RGW pods when + those paths are under test. + """ + from ocs_ci.ocs.resources.pod import get_pods_having_label + + selectors = [ + constants.OCS_OPERATOR_LABEL, + constants.OPERATOR_LABEL, + ] + if component in ("noobaa", "all"): + selectors.extend( + [ + constants.NOOBAA_OPERATOR_POD_LABEL, + constants.NOOBAA_CORE_POD_LABEL, + ] + ) + if component in ("rgw", "all"): + selectors.append(constants.RGW_APP_LABEL) + + names = set() + for label in selectors: + items = get_pods_having_label(label, namespace) or [] + for item in items: + name = item.get("metadata", {}).get("name") + if name: + names.add(name) + return sorted(names) + + +def scan_pod_logs_for_tls_errors( + pod_name, + namespace, + since="30m", + tail=800, +): + """ + Return log lines that match TLS-related error heuristics for a single pod. + """ + from ocs_ci.ocs.resources.pod import get_pod_logs + + try: + raw = get_pod_logs( + pod_name=pod_name, + namespace=namespace, + since=since, + tail=str(tail), + ) + except Exception as exc: + log.warning("Could not read logs for pod %s: %s", pod_name, exc) + return [] + + bad = [] + for line in raw.splitlines(): + if not line.strip(): + continue + if ( + _TLS_LOG_ERROR_RE.search(line) + and not _TLS_LOG_EXCLUDE_RE.search(line) + and not re.search(r"(?i)no error|error.?0|errors:? ?0", line) + ): + bad.append(line) + return bad + + +def assert_no_tls_errors_in_relevant_pod_logs( + namespace, + component, + since="45m", + tail=800, + max_lines_per_pod=30, +): + """ + Fail the test if recent operator / workload logs contain likely TLS error lines. + + Args: + namespace (str): Storage namespace (e.g. openshift-storage). + component (str): Test parametrization key: ``all``, ``noobaa``, or ``rgw``. + since (str): Passed to ``oc logs --since`` (recent window for this run). + tail (str|int): Max tail lines per pod. + max_lines_per_pod (int): Cap lines included in failure output. + """ + findings = {} + for pod_name in gather_tls_relevant_pod_names(namespace, component): + hits = scan_pod_logs_for_tls_errors(pod_name, namespace, since=since, tail=tail) + if hits: + findings[pod_name] = hits[:max_lines_per_pod] + + if findings: + blocks = [] + for pname, lines in findings.items(): + blocks.append(pname + ":\n" + "\n".join(f" {ln}" for ln in lines)) + raise AssertionError( + "TLS-related errors found in pod logs (heuristic grep):\n" + + "\n".join(blocks) + ) + + +# --- In-cluster TLS scanner (openssl s_client probes on pod IPs) ------------- + +SCAN_CLUSTER_DEFAULT_TIMEOUT = 5 +SCAN_CLUSTER_DEFAULT_SKIP_PORTS = "22,53" +SCAN_CLUSTER_DEFAULT_TLS_VERSIONS = "tls1.2,tls1.3" + +SCAN_CLUSTER_DEFAULT_TLS12_CIPHERS = ( + "ECDHE-ECDSA-AES128-GCM-SHA256," + "ECDHE-ECDSA-AES256-GCM-SHA384," + "ECDHE-ECDSA-CHACHA20-POLY1305," + "ECDHE-RSA-AES128-GCM-SHA256," + "ECDHE-RSA-AES256-GCM-SHA384," + "ECDHE-RSA-CHACHA20-POLY1305" +) + +SCAN_CLUSTER_DEFAULT_TLS12_GROUPS = "prime256v1,secp384r1,secp521r1,X25519" + +SCAN_CLUSTER_DEFAULT_TLS13_CIPHERS = ( + "TLS_AES_128_GCM_SHA256," "TLS_AES_256_GCM_SHA384," "TLS_CHACHA20_POLY1305_SHA256" +) + +SCAN_CLUSTER_DEFAULT_TLS13_GROUPS = ( + "prime256v1,secp384r1,secp521r1,X25519," + "X25519MLKEM768,SecP256r1MLKEM768,SecP384r1MLKEM1024" +) + +TLS_SCANNER_IMAGE = "ghcr.io/leelavg/scantls@sha256:f9b6547c7285b28539b23d2135108b57ac8bbac0c51a82c5a274a2674a6eff70" +TLS_SCANNER_NAMESPACE = "scantls-system" +# Seconds between ``oc get pod … jsonpath={.status.phase}`` samples (scanner pod startup). +TLS_SCAN_POD_PHASE_POLL_SLEEP = 2 + +TLS_SCAN_COMPONENT_SELECTORS = { + "noobaa": {"label": "app=noobaa"}, + "rgw": {"label": "app=rook-ceph-rgw"}, + "ceph": {"label": "rook_cluster=openshift-storage"}, + "csi": {"name_filter": "csi"}, + "all": {}, +} + +TLS_SCAN_BASH_SCRIPT_PATH = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + os.pardir, + os.pardir, + "scripts", + "bash", + "tls_scan_endpoints.sh", + ) +) +_tls_scan_bash_script_cache = None + + +def _get_tls_scan_bash_script(): + """Load the in-cluster TLS probe script from ``scripts/bash/tls_scan_endpoints.sh``.""" + global _tls_scan_bash_script_cache + if _tls_scan_bash_script_cache is None: + try: + with open(TLS_SCAN_BASH_SCRIPT_PATH, encoding="utf-8") as fh: + _tls_scan_bash_script_cache = fh.read() + except OSError as exc: + raise RuntimeError( + f"TLS scan: cannot read bash script {TLS_SCAN_BASH_SCRIPT_PATH}: {exc}" + ) from exc + return _tls_scan_bash_script_cache + + +def _resolve_tls_scan_kubeconfig(kubeconfig): + """Return explicit kubeconfig path, or None to use current oc context.""" + if kubeconfig: + return kubeconfig + kc = config.RUN.get("kubeconfig") + if kc: + return kc + cluster_path = config.ENV_DATA.get("cluster_path") + if cluster_path: + loc = config.RUN.get("kubeconfig_location") or defaults.KUBECONFIG_LOCATION + return os.path.join(cluster_path, loc) + return None + + +def _tls_scan_run_oc(args, kubeconfig=None, timeout=60): + cmd = ["oc"] + list(args) + if kubeconfig: + cmd.extend(["--kubeconfig", kubeconfig]) + completed = exec_cmd(cmd, timeout=timeout) + return completed.stdout.decode() + + +def _tls_scan_discover_endpoints(kubeconfig, namespaces, component="all"): + selector = TLS_SCAN_COMPONENT_SELECTORS.get(component, {}) + label = selector.get("label") + name_filter = selector.get("name_filter") + + endpoints = [] + for ns in namespaces: + log.info("TLS scan: discovering %s pods in namespace %s", component, ns) + cmd = [ + "get", + "pods", + "-n", + ns, + "-o", + "json", + "--field-selector=status.phase=Running", + ] + if label: + cmd.extend(["-l", label]) + + out = _tls_scan_run_oc(cmd, kubeconfig=kubeconfig, timeout=30) + data = json.loads(out) + + for pod in data.get("items", []): + pod_name = pod["metadata"]["name"] + pod_ns = pod["metadata"]["namespace"] + pod_ip = pod["status"].get("podIP", "") + if not pod_ip: + continue + if name_filter and name_filter not in pod_name: + continue + for container in pod["spec"]["containers"]: + c_name = container["name"] + cmd_parts = container.get("command", []) + container.get("args", []) + process = "" + if cmd_parts: + process = cmd_parts[0].rsplit("/", 1)[-1][:15] + if not process: + process = ( + container.get("image", "").split("/")[-1].split(":")[0][:15] + ) + for port_info in container.get("ports", []): + port = port_info.get("containerPort") + if port: + endpoints.append( + { + "pod_namespace": pod_ns, + "pod_name": pod_name, + "pod_ip": pod_ip, + "container_name": c_name, + "port": str(port), + "process": process, + } + ) + + log.info( + "TLS scan: discovered %d endpoints for component %r in %d namespace(s)", + len(endpoints), + component, + len(namespaces), + ) + return endpoints + + +def _tls_scan_build_endpoints_file(endpoints): + lines = [] + for ep in endpoints: + lines.append( + f"{ep['pod_namespace']}|{ep['pod_name']}|{ep['pod_ip']}|" + f"{ep['container_name']}|{ep['port']}|{ep['process']}" + ) + return "\n".join(lines) + "\n" + + +def _tls_scan_setup_namespace(kubeconfig): + try: + _tls_scan_run_oc( + ["get", "namespace", TLS_SCANNER_NAMESPACE], + kubeconfig=kubeconfig, + timeout=10, + ) + log.info("TLS scan: namespace %s exists", TLS_SCANNER_NAMESPACE) + except CommandFailed: + log.info("TLS scan: creating namespace %s", TLS_SCANNER_NAMESPACE) + _tls_scan_run_oc( + ["create", "namespace", TLS_SCANNER_NAMESPACE], + kubeconfig=kubeconfig, + timeout=10, + ) + + +def _tls_scan_wait_for_pod_ready( + kubeconfig, + pod_name, + timeout=120, + sleep=TLS_SCAN_POD_PHASE_POLL_SLEEP, +): + def _pod_phase(): + out = _tls_scan_run_oc( + [ + "get", + "pod", + pod_name, + "-n", + TLS_SCANNER_NAMESPACE, + "-o", + "jsonpath={.status.phase}", + ], + kubeconfig=kubeconfig, + timeout=10, + ) + return out.strip() + + try: + for phase in TimeoutSampler(timeout, sleep, _pod_phase): + if phase == "Running": + return + if phase in ("Failed", "Error"): + raise CommandFailed(f"TLS scan: scanner pod failed: {phase}") + except TimeoutExpiredError: + raise CommandFailed( + f"TLS scan: scanner pod not ready after {timeout}s" + ) from None + + +def _tls_scan_run_in_pod(kubeconfig, pod_name, endpoints_data, timeout=600): + _tls_scan_run_oc( + [ + "exec", + "-n", + TLS_SCANNER_NAMESPACE, + pod_name, + "--", + "bash", + "-c", + f"cat > /tmp/endpoints.txt << 'ENDOFDATA'\n{endpoints_data}ENDOFDATA", + ], + kubeconfig=kubeconfig, + timeout=30, + ) + + _tls_scan_run_oc( + [ + "exec", + "-n", + TLS_SCANNER_NAMESPACE, + pod_name, + "--", + "bash", + "-c", + f"cat > /tmp/scan.sh << 'ENDOFSCRIPT'\n{_get_tls_scan_bash_script()}ENDOFSCRIPT", + ], + kubeconfig=kubeconfig, + timeout=30, + ) + + log.info("TLS scan: running openssl probes (may take several minutes)") + return _tls_scan_run_oc( + [ + "exec", + "-n", + TLS_SCANNER_NAMESPACE, + pod_name, + "--", + "bash", + "/tmp/scan.sh", + ], + kubeconfig=kubeconfig, + timeout=timeout, + ) + + +def _tls_scan_space_separated_to_list(value): + if not value or value == "NA": + return [] + return value.split() + + +def _tls_scan_parse_csv(csv_text): + results = [] + reader = csv.DictReader(io.StringIO(csv_text)) + for row in reader: + port_str = row.get("port", "0") + try: + port_val = int(port_str) + except (ValueError, TypeError): + port_val = 0 + + results.append( + { + "pod_namespace": row.get("pod_namespace", ""), + "pod_name": row.get("pod_name", ""), + "pod_ip": row.get("pod_ip", ""), + "container_name": row.get("container_name", ""), + "port": port_val, + "process": row.get("process", ""), + "status": row.get("status", ""), + "tls_versions": _tls_scan_space_separated_to_list( + row.get("tlsversions") + ), + "tls12_ciphers": _tls_scan_space_separated_to_list( + row.get("tls12ciphers") + ), + "tls12_groups": _tls_scan_space_separated_to_list( + row.get("tls12groups") + ), + "tls13_ciphers": _tls_scan_space_separated_to_list( + row.get("tls13ciphers") + ), + "tls13_groups": _tls_scan_space_separated_to_list( + row.get("tls13groups") + ), + "reason": row.get("reason", ""), + } + ) + return results + + +def _tls_scan_cleanup_pod(kubeconfig, pod_name): + log.info("TLS scan: deleting pod %s", pod_name) + try: + _tls_scan_run_oc( + [ + "delete", + "pod", + pod_name, + "-n", + TLS_SCANNER_NAMESPACE, + "--grace-period=0", + "--force", + ], + kubeconfig=kubeconfig, + timeout=30, + ) + except CommandFailed as e: + log.warning("TLS scan: cleanup failed: %s", e) + + +def scan_cluster( + component="all", + kubeconfig=None, + namespaces=None, + timeout=SCAN_CLUSTER_DEFAULT_TIMEOUT, + skip_ports=None, + tls_versions=None, + tls12_ciphers=None, + tls12_groups=None, + tls13_ciphers=None, + tls13_groups=None, + scanner_image=None, + scan_timeout=600, + cleanup=True, +): + """ + Discover pod container ports in the storage namespace(s), run a short-lived + scanner pod in ``scantls-system``, and return per-endpoint TLS probe results. + + Args: + component: ``noobaa``, ``rgw``, ``ceph``, ``csi``, or ``all``. + kubeconfig: Path to kubeconfig; defaults from RUN / ENV_DATA (see + :func:`_resolve_tls_scan_kubeconfig`). + namespaces: Namespaces to scan; default + ``cluster_namespace`` or openshift-storage. + timeout: Per-openssl-probe timeout (seconds). + skip_ports: Comma-separated ports to skip. + tls_versions: Comma-separated versions to test (default tls1.2,tls1.3). + tls12_ciphers: Comma-separated OpenSSL cipher names for TLS 1.2. + tls12_groups: Comma-separated groups for TLS 1.2. + tls13_ciphers: Comma-separated ciphersuites for TLS 1.3. + tls13_groups: Comma-separated groups for TLS 1.3. + scanner_image: Scanner container image (default ``TLS_SCANNER_IMAGE``). + scan_timeout: Max seconds for the remote ``scan.sh`` run. + cleanup: Delete the scanner pod when finished. + + Returns: + list: One dict per endpoint with keys pod_namespace, pod_name, pod_ip, + container_name, port (int), process, status (OK|NO_TLS|SKIPPED), + tls_versions, tls12_ciphers, tls12_groups, tls13_ciphers, + tls13_groups, reason. + """ + if component not in TLS_SCAN_COMPONENT_SELECTORS: + raise ValueError( + f"Unknown component {component!r}; must be one of: " + f"{', '.join(TLS_SCAN_COMPONENT_SELECTORS)}" + ) + + kubeconfig = _resolve_tls_scan_kubeconfig(kubeconfig) + + if namespaces is None: + ns = ( + config.ENV_DATA.get("cluster_namespace") + or constants.OPENSHIFT_STORAGE_NAMESPACE + ) + namespaces = [ns] + + if scanner_image is None: + scanner_image = TLS_SCANNER_IMAGE + + env_vars = { + "TIMEOUT": str(timeout), + "SKIP_PORTS": skip_ports or SCAN_CLUSTER_DEFAULT_SKIP_PORTS, + "TLS_VERSIONS": tls_versions or SCAN_CLUSTER_DEFAULT_TLS_VERSIONS, + "TLS12_CIPHERS": tls12_ciphers or SCAN_CLUSTER_DEFAULT_TLS12_CIPHERS, + "TLS12_GROUPS": tls12_groups or SCAN_CLUSTER_DEFAULT_TLS12_GROUPS, + "TLS13_CIPHERS": tls13_ciphers or SCAN_CLUSTER_DEFAULT_TLS13_CIPHERS, + "TLS13_GROUPS": tls13_groups or SCAN_CLUSTER_DEFAULT_TLS13_GROUPS, + } + + endpoints = _tls_scan_discover_endpoints(kubeconfig, namespaces, component) + if not endpoints: + log.warning( + "TLS scan: no endpoints for component %r in %s", + component, + namespaces, + ) + return [] + + endpoints_data = _tls_scan_build_endpoints_file(endpoints) + _tls_scan_setup_namespace(kubeconfig) + + pod_name = f"tls-scanner-{uuid.uuid4().hex[:8]}" + pod_manifest = { + "apiVersion": "v1", + "kind": "Pod", + "metadata": { + "name": pod_name, + "namespace": TLS_SCANNER_NAMESPACE, + "labels": {"app": "tls-scanner"}, + }, + "spec": { + "restartPolicy": "Never", + "terminationGracePeriodSeconds": 0, + "containers": [ + { + "name": "scanner", + "image": scanner_image, + "command": ["sleep", "3600"], + "env": [{"name": k, "value": v} for k, v in env_vars.items()], + "securityContext": { + "allowPrivilegeEscalation": False, + "capabilities": {"drop": ["ALL"]}, + "runAsNonRoot": True, + "seccompProfile": {"type": "RuntimeDefault"}, + }, + } + ], + }, + } + + log.info( + "TLS scan: deploying scanner pod %s in %s", + pod_name, + TLS_SCANNER_NAMESPACE, + ) + manifest_json = json.dumps(pod_manifest) + apply_cmd = ["oc", "apply", "-f", "-"] + if kubeconfig: + apply_cmd.extend(["--kubeconfig", kubeconfig]) + exec_cmd(apply_cmd, timeout=30, input=manifest_json.encode()) + + try: + _tls_scan_wait_for_pod_ready(kubeconfig, pod_name) + csv_output = _tls_scan_run_in_pod( + kubeconfig, pod_name, endpoints_data, timeout=scan_timeout + ) + results = _tls_scan_parse_csv(csv_output) + log.info( + "TLS scan: complete %d endpoints (%d OK, %d NO_TLS, %d other)", + len(results), + sum(1 for r in results if r["status"] == "OK"), + sum(1 for r in results if r["status"] == "NO_TLS"), + sum(1 for r in results if r["status"] not in ("OK", "NO_TLS")), + ) + return results + finally: + if cleanup: + _tls_scan_cleanup_pod(kubeconfig, pod_name) + + +# Maps TLSProfile ``spec.rules[].config.version`` to tokens produced by the in-cluster +# scanner (entries in ``tls_versions`` from :func:`scan_cluster`). +TLS_PROFILE_VERSION_TO_SCAN_TOKEN = { + "TLSv1.2": "tls1.2", + "TLSv1.3": "tls1.3", +} + + +def tls_profile_api_version_to_scan_token(api_version): + """Return scanner ``tls_versions`` token (e.g. ``tls1.3``) for a TLSProfile version.""" + token = TLS_PROFILE_VERSION_TO_SCAN_TOKEN.get(api_version) + if not token: + raise ValueError( + f"Unsupported TLSProfile API version {api_version!r}; " + f"expected one of: {', '.join(TLS_PROFILE_VERSION_TO_SCAN_TOKEN)}" + ) + return token + + +def assert_tls_scan_results_include_version( + results, + api_tls_version, + *, + min_matching_endpoints=1, + context="", +): + """ + Fail unless at least ``min_matching_endpoints`` scan rows with ``status == "OK"`` + list the TLS version matching ``api_tls_version`` (see :func:`scan_cluster`). + + Other ``OK`` rows may negotiate only other versions (e.g. TLS 1.2 on some ports + while the profile allows 1.3); mixed ports on the same workload do not fail the + check as long as the minimum number of matches is met. + + Rows with ``NO_TLS`` or ``SKIPPED`` are ignored. + + Args: + results: Return value of :func:`scan_cluster`. + api_tls_version: e.g. ``TLSv1.2`` or ``TLSv1.3``. + min_matching_endpoints: Minimum count of ``OK`` rows that must include the + expected version (default 1). + context: Short string appended to failure messages for debugging. + + On success with ``api_tls_version`` ``TLSv1.3``, each matching endpoint is logged at + INFO (pod IP, port, namespace, pod, container, process, ``tls_versions``). + """ + token = tls_profile_api_version_to_scan_token(api_tls_version) + ok_rows = [r for r in results if r["status"] == "OK"] + suffix = f" ({context})" if context else "" + + matching = [r for r in ok_rows if token in (r.get("tls_versions") or [])] + + if len(matching) < min_matching_endpoints: + sample_other = [ + f"{r['pod_namespace']}/{r['pod_name']}:{r['port']} " + f"tls_versions={(r.get('tls_versions') or [])!r}" + for r in ok_rows + if token not in (r.get("tls_versions") or []) + ][:15] + other_msg = ( + "\nOther OK endpoints (no %r): %s" + % ( + token, + "\n".join(sample_other) if sample_other else "none", + ) + if sample_other or ok_rows + else "" + ) + raise AssertionError( + f"TLS scan: expected at least {min_matching_endpoints} OK endpoint(s) " + f"with {api_tls_version} ({token!r}){suffix}; " + f"found {len(matching)} matching, {len(ok_rows)} OK total " + f"(rows in scan: {len(results)}).{other_msg}" + ) + + if api_tls_version == "TLSv1.3" and matching: + log.info( + "TLS scan: TLSProfile targets TLS 1.3; %d scanner row(s) negotiated tls1.3%s", + len(matching), + suffix, + ) + for r in matching: + pod_ip = r.get("pod_ip") or "" + port = r.get("port") or "" + endpoint = f"{pod_ip}:{port}" if pod_ip else f":{port}" + log.info( + "TLS 1.3 found: endpoint=%s namespace=%s pod=%s container=%s " + "port=%s process=%s tls_versions=%s", + endpoint, + r.get("pod_namespace"), + r.get("pod_name"), + r.get("container_name"), + r.get("port"), + r.get("process"), + r.get("tls_versions"), + ) + + +class TLSProfile: + """ + Manage TLSProfile CRs in the ODF namespace. + """ + + API_VERSION = "ocs.openshift.io/v1" + KIND = "TLSProfile" + + def __init__( + self, + name="ocs-tls-profile", + namespace=None, + ): + """ + Args: + name (str): TLSProfile metadata.name + namespace (str): Namespace for the resource; defaults to cluster_namespace + from config, then openshift-storage. + """ + self.name = name + self.namespace = ( + namespace + or config.ENV_DATA.get("cluster_namespace") + or (constants.OPENSHIFT_STORAGE_NAMESPACE) + ) + self._ocp = OCP( + api_version=self.API_VERSION, + kind=self.KIND, + namespace=self.namespace, + resource_name=self.name, + ) + + def create_tls_profile( + self, + selectors=None, + tls_version="TLSv1.3", + ciphers=None, + groups=None, + do_reload=True, + ): + """ + Create a TLSProfile with one rule; selectors, TLS version, ciphers, and + groups are configurable. + + Args: + selectors (list | str): Rule selectors; default is a single wildcard. + tls_version (str): spec.rules[].config.version + ciphers (list): spec.rules[].config.ciphers + groups (list): spec.rules[].config.groups + do_reload (bool): Reload OCS object after create. + + Returns: + OCS: The created TLSProfile object. + """ + if selectors is None: + selectors = ["*"] + elif isinstance(selectors, str): + selectors = [selectors] + + if ciphers is None: + ciphers = ( + list(TLS_PROFILE_V13_CIPHERS) + if tls_version == "TLSv1.3" + else list(TLS_PROFILE_V12_CIPHERS) + ) + if groups is None: + groups = ( + list(TLS_PROFILE_V13_GROUPS) + if tls_version == "TLSv1.3" + else list(TLS_PROFILE_V12_GROUPS) + ) + + tls_resource = { + "apiVersion": self.API_VERSION, + "kind": self.KIND, + "metadata": {"name": self.name, "namespace": self.namespace}, + "spec": { + "rules": [ + { + "selectors": list(selectors), + "config": { + "version": tls_version, + "ciphers": list(ciphers), + "groups": list(groups), + }, + } + ] + }, + } + ocs_obj = OCS(**tls_resource) + log.info( + f"Creating {self.KIND} {self.name} in namespace {self.namespace} " + f"(version={tls_version})" + ) + ocs_obj.create(do_reload=do_reload) + return ocs_obj + + def is_tls_profile_available(self, silent=True): + """ + Return True if the TLSProfile exists on the cluster. + + Args: + silent (bool): If True, suppress warnings on failed get attempts. + """ + data = self._ocp.get( + resource_name=self.name, + dont_raise=True, + silent=silent, + retry=0, + ) + if not data: + return False + return data.get("kind") == self.KIND + + def get_tls_profile(self, out_yaml_format=True): + """ + Fetch the TLSProfile from the cluster. + + Args: + out_yaml_format (bool): Return parsed YAML dict when True. + + Returns: + dict or str: Resource from oc get. + """ + return self._ocp.get(resource_name=self.name, out_yaml_format=out_yaml_format) + + def delete_tls_profile(self, wait=True, force=False): + """ + Delete the TLSProfile from the cluster. + + Args: + wait (bool): Wait for deletion to complete. + force (bool): Force delete with grace period 0. + + Returns: + dict: Parsed oc delete output when YAML; depends on OCP.delete behavior. + """ + log.info(f"Deleting {self.KIND} {self.name} in namespace {self.namespace}") + return self._ocp.delete(resource_name=self.name, wait=wait, force=force) + + def get_rule_config(self): + """Return the first rule's config dict or None.""" + data = self.get_tls_profile() + rules = data.get("spec", {}).get("rules") or [] + if not rules: + return None + return (rules[0].get("config") or {}).copy() + + def get_config_version(self): + """Return spec.rules[0].config.version if present.""" + cfg = self.get_rule_config() + return cfg.get("version") if cfg else None + + def replace_rules(self, selectors, tls_version, ciphers, groups): + """ + Merge-patch the full rules list (single rule) on this TLSProfile. + + Args: + selectors (list): Rule selectors. + tls_version (str): e.g. TLSv1.2, TLSv1.3 + ciphers (list): Cipher suite names for that version. + groups (list): Group names for that version. + """ + patch = { + "spec": { + "rules": [ + { + "selectors": list(selectors), + "config": { + "version": tls_version, + "ciphers": list(ciphers), + "groups": list(groups), + }, + } + ] + } + } + patched = self._ocp.patch( + resource_name=self.name, + params=json.dumps(patch), + format_type="merge", + ) + if not patched: + log.warning( + "oc patch for %s did not report success; validating via get", + self.name, + ) + + +def tlsprofile_crd_exists(): + """Return True if tlsprofiles.ocs.openshift.io CRD is installed.""" + crd = OCP( + api_version="apiextensions.k8s.io/v1", + kind="CustomResourceDefinition", + resource_name="tlsprofiles.ocs.openshift.io", + ) + data = crd.get(dont_raise=True, silent=True) + return bool(data and data.get("metadata", {}).get("name")) + + +def wait_for_tlsprofile_config_version( + tls_profile, expected_version, timeout=600, sleep=15 +): + """Wait until TLSProfile spec shows the given TLS version string.""" + + def _version(): + return tls_profile.get_config_version() + + TimeoutSampler(timeout, sleep, _version).wait_for_func_value(expected_version) + + +def get_noobaa_api_server_security(namespace): + """Return NooBaa spec.security.apiServerSecurity dict or None.""" + nb = OCP(kind="noobaa", namespace=namespace, resource_name="noobaa") + data = nb.get() + return data.get("spec", {}).get("security", {}).get("apiServerSecurity") + + +def wait_for_noobaa_api_server_security_absent(namespace, timeout=600, sleep=15): + """Wait until NooBaa has no apiServerSecurity (TLSProfile-based config cleared).""" + + def _cleared(): + return get_noobaa_api_server_security(namespace) is None + + TimeoutSampler(timeout, sleep, _cleared).wait_for_func_value(True) + + +def wait_for_noobaa_tls_min_version_substring( + namespace, expected_substring, timeout=600, sleep=15 +): + """ + Wait until NooBaa apiServerSecurity.tlsMinVersion contains expected_substring + (e.g. '1.2' or '1.3'). + """ + + def _match(): + sec = get_noobaa_api_server_security(namespace) + if sec is None: + return False + ver = sec.get("tlsMinVersion") + return ver is not None and expected_substring in str(ver).lower() + + TimeoutSampler(timeout, sleep, _match).wait_for_func_value(True) + + +def get_first_cephobjectstore_name(namespace): + """ + Return the name of the first CephObjectStore in namespace, or None. + """ + cos = OCP( + api_version=defaults.ROOK_API_VERSION, + kind="CephObjectStore", + namespace=namespace, + ) + items = cos.get().get("items") or [] + if not items: + return None + return items[0]["metadata"]["name"] + + +def get_cephobjectstore_security(namespace, name): + """Return CephObjectStore spec.security dict or None.""" + cos = OCP( + api_version=defaults.ROOK_API_VERSION, + kind="CephObjectStore", + namespace=namespace, + resource_name=name, + ) + data = cos.get() + return data.get("spec", {}).get("security") + + +def wait_for_cephobjectstore_tls_ciphers_substring( + namespace, cos_name, cipher_substr, timeout=600, sleep=15 +): + """Wait until RGW object's spec.security.ciphers mentions cipher_substr (TLS 1.2 probe).""" + + def _match(): + sec = get_cephobjectstore_security(namespace, cos_name) + if not sec: + return False + ciphers = sec.get("ciphers") or [] + joined = " ".join(ciphers).lower() + return cipher_substr.lower() in joined + + TimeoutSampler(timeout, sleep, _match).wait_for_func_value(True) + + +def wait_for_cephobjectstore_security_cleared( + namespace, cos_name, timeout=600, sleep=15 +): + """ + Wait until spec.security carries no TLSProfile-propagated cipher/group lists + (empty or security omitted). + """ + + def _cleared(): + sec = get_cephobjectstore_security(namespace, cos_name) + if sec is None: + return True + ciphers = sec.get("ciphers") or [] + groups = sec.get("tlsGroups") or [] + return len(ciphers) == 0 and len(groups) == 0 + + TimeoutSampler(timeout, sleep, _cleared).wait_for_func_value(True) diff --git a/ocs_ci/krkn_chaos/background_cluster_operations.py b/ocs_ci/krkn_chaos/background_cluster_operations.py index fca44a6bdb80..1336e7e4dbd5 100644 --- a/ocs_ci/krkn_chaos/background_cluster_operations.py +++ b/ocs_ci/krkn_chaos/background_cluster_operations.py @@ -27,13 +27,12 @@ from typing import List, Dict, Any, Optional from collections import defaultdict -from ocs_ci.ocs import constants, ocp +from ocs_ci.ocs import constants, node as node_helpers, ocp from ocs_ci.ocs.resources import pod as pod_helpers from ocs_ci.ocs.resources import pvc as pvc_helpers from ocs_ci.ocs.resources import job as job_helpers from ocs_ci.ocs.resources.ocs import OCS from ocs_ci.ocs.resources.pvc import PVC -from ocs_ci.ocs import node as node_helpers from ocs_ci.ocs.exceptions import ( UnexpectedBehaviour, CommandFailed, @@ -914,7 +913,7 @@ def _longevity_operations(self): workload_pvcs = [] for idx, workload in enumerate(self.workloads[:num_pvcs]): log.debug( - f"Checking workload {idx+1}/{num_pvcs}: " + f"Checking workload {idx + 1}/{num_pvcs}: " f"type={type(workload).__name__}, " f"has_pvc_objs={hasattr(workload, 'pvc_objs')}, " f"has_pvc_obj={hasattr(workload, 'pvc_obj')}, " diff --git a/ocs_ci/utility/utils.py b/ocs_ci/utility/utils.py index 45e1a2dcadbd..f1eb69400f98 100644 --- a/ocs_ci/utility/utils.py +++ b/ocs_ci/utility/utils.py @@ -834,15 +834,17 @@ def exec_cmd( log.info(f"Executing command: {masked_cmd}") if threading_lock and cmd[0] == "oc": threading_lock.acquire(timeout=lock_timeout) - completed_process = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - stdin=subprocess.PIPE, - timeout=timeout, - env=_env, - **kwargs, - ) + run_kw = { + "stdout": subprocess.PIPE, + "stderr": subprocess.PIPE, + "timeout": timeout, + "env": _env, + } + # subprocess.run forbids stdin= and input= together; when callers pass input, + # stdin is managed internally. + if "input" not in kwargs: + run_kw["stdin"] = subprocess.PIPE + completed_process = subprocess.run(cmd, **run_kw, **kwargs) finally: if threading_lock and cmd[0] == "oc": threading_lock.release() diff --git a/scripts/bash/tls_scan_endpoints.sh b/scripts/bash/tls_scan_endpoints.sh new file mode 100644 index 000000000000..6c91394be3fd --- /dev/null +++ b/scripts/bash/tls_scan_endpoints.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Probes pod IP:port endpoints from /tmp/endpoints.txt with openssl s_client; emits CSV. +# Invoked from in-cluster scanner pods by ocs_ci.helpers.tlsprofile_helper.scan_cluster. + +set -uo pipefail + +TIMEOUT="${TIMEOUT:-5}" +SKIP_PORTS="${SKIP_PORTS:-22,53}" +TLS_VERSIONS="${TLS_VERSIONS:-tls1.2,tls1.3}" +TLS12_CIPHERS="${TLS12_CIPHERS:-ECDHE-ECDSA-AES128-GCM-SHA256,ECDHE-ECDSA-AES256-GCM-SHA384,ECDHE-ECDSA-CHACHA20-POLY1305,ECDHE-RSA-AES128-GCM-SHA256,ECDHE-RSA-AES256-GCM-SHA384,ECDHE-RSA-CHACHA20-POLY1305}" +TLS12_GROUPS="${TLS12_GROUPS:-prime256v1,secp384r1,secp521r1,X25519}" +TLS13_CIPHERS="${TLS13_CIPHERS:-TLS_AES_128_GCM_SHA256,TLS_AES_256_GCM_SHA384,TLS_CHACHA20_POLY1305_SHA256}" +TLS13_GROUPS="${TLS13_GROUPS:-prime256v1,secp384r1,secp521r1,X25519,X25519MLKEM768,SecP256r1MLKEM768,SecP384r1MLKEM1024}" + +IFS=',' read -ra TLS_VERSIONS_ARRAY <<< "$TLS_VERSIONS" +IFS=',' read -ra TLS12_CIPHERS_ARRAY <<< "$TLS12_CIPHERS" +IFS=',' read -ra TLS12_GROUPS_ARRAY <<< "$TLS12_GROUPS" +IFS=',' read -ra TLS13_CIPHERS_ARRAY <<< "$TLS13_CIPHERS" +IFS=',' read -ra TLS13_GROUPS_ARRAY <<< "$TLS13_GROUPS" +IFS=',' read -ra SKIP_PORTS_ARRAY <<< "$SKIP_PORTS" + +test_tls_handshake() { + local ip=$1 port=$2 + local result + result=$(echo | timeout 2 openssl s_client -connect "$ip:$port" 2>/dev/null) + echo "$result" | grep -q "^CONNECTED" +} + +test_tls_version() { + local ip=$1 port=$2 version=$3 + local flag + case "$version" in + tls1) flag="-tls1" ;; + tls1.1) flag="-tls1_1" ;; + tls1.2) flag="-tls1_2" ;; + tls1.3) flag="-tls1_3" ;; + *) return 1 ;; + esac + local result + result=$(echo | timeout "$TIMEOUT" openssl s_client -connect "$ip:$port" "$flag" 2>/dev/null) + echo "$result" | grep -q "^New, TLSv" +} + +test_tls12_cipher() { + local ip=$1 port=$2 cipher=$3 + local result + result=$(echo | timeout "$TIMEOUT" openssl s_client -connect "$ip:$port" -tls1_2 -cipher "$cipher" 2>/dev/null) + echo "$result" | grep -q "Cipher is" && ! echo "$result" | grep -q "Cipher is (NONE)" +} + +test_tls13_cipher() { + local ip=$1 port=$2 cipher=$3 + local result + result=$(echo | timeout "$TIMEOUT" openssl s_client -connect "$ip:$port" -tls1_3 -ciphersuites "$cipher" 2>/dev/null) + echo "$result" | grep -q "Cipher is" && ! echo "$result" | grep -q "Cipher is (NONE)" +} + +test_group() { + local ip=$1 port=$2 version=$3 group=$4 + local flag + case "$version" in + tls1.2) flag="-tls1_2" ;; + tls1.3) flag="-tls1_3" ;; + *) return 1 ;; + esac + local result + result=$(echo | timeout "$TIMEOUT" openssl s_client -connect "$ip:$port" "$flag" -groups "$group" 2>/dev/null) + echo "$result" | grep -qE "(Server|Peer) Temp Key" || echo "$result" | grep -qE "Negotiated.*: [^<]" +} + +scan_endpoint() { + local pod_ns=$1 pod_name=$2 pod_ip=$3 container_name=$4 port=$5 process=$6 + + for skip in "${SKIP_PORTS_ARRAY[@]}"; do + if [[ "$port" == "$skip" ]]; then + echo "$pod_ns,$pod_name,$pod_ip,$container_name,$port,$process,SKIPPED,NA,NA,NA,NA,NA,Port in skip list" + return + fi + done + + if ! test_tls_handshake "$pod_ip" "$port"; then + echo "$pod_ns,$pod_name,$pod_ip,$container_name,$port,$process,NO_TLS,NA,NA,NA,NA,NA,No TLS handshake" + return + fi + + local supported_versions="" + local supported_tls12_ciphers="" + local supported_tls12_groups="" + local supported_tls13_ciphers="" + local supported_tls13_groups="" + + for version in "${TLS_VERSIONS_ARRAY[@]}"; do + if ! test_tls_version "$pod_ip" "$port" "$version"; then + continue + fi + supported_versions="${supported_versions:+$supported_versions }$version" + + if [[ "$version" == "tls1.2" ]]; then + for cipher in "${TLS12_CIPHERS_ARRAY[@]}"; do + if test_tls12_cipher "$pod_ip" "$port" "$cipher"; then + supported_tls12_ciphers="${supported_tls12_ciphers:+$supported_tls12_ciphers }$cipher" + fi + done + for group in "${TLS12_GROUPS_ARRAY[@]}"; do + if test_group "$pod_ip" "$port" "$version" "$group"; then + supported_tls12_groups="${supported_tls12_groups:+$supported_tls12_groups }$group" + fi + done + elif [[ "$version" == "tls1.3" ]]; then + for cipher in "${TLS13_CIPHERS_ARRAY[@]}"; do + if test_tls13_cipher "$pod_ip" "$port" "$cipher"; then + supported_tls13_ciphers="${supported_tls13_ciphers:+$supported_tls13_ciphers }$cipher" + fi + done + for group in "${TLS13_GROUPS_ARRAY[@]}"; do + if test_group "$pod_ip" "$port" "$version" "$group"; then + supported_tls13_groups="${supported_tls13_groups:+$supported_tls13_groups }$group" + fi + done + fi + done + + [[ -z "$supported_versions" ]] && supported_versions="NA" + [[ -z "$supported_tls12_ciphers" ]] && supported_tls12_ciphers="NA" + [[ -z "$supported_tls12_groups" ]] && supported_tls12_groups="NA" + [[ -z "$supported_tls13_ciphers" ]] && supported_tls13_ciphers="NA" + [[ -z "$supported_tls13_groups" ]] && supported_tls13_groups="NA" + + if [[ "$supported_versions" != "NA" ]]; then + echo "$pod_ns,$pod_name,$pod_ip,$container_name,$port,$process,OK,$supported_versions,$supported_tls12_ciphers,$supported_tls12_groups,$supported_tls13_ciphers,$supported_tls13_groups,Supports: $supported_versions" + else + echo "$pod_ns,$pod_name,$pod_ip,$container_name,$port,$process,NO_TLS,NA,NA,NA,NA,NA,No TLS version accepted" + fi +} + +echo "pod_namespace,pod_name,pod_ip,container_name,port,process,status,tlsversions,tls12ciphers,tls12groups,tls13ciphers,tls13groups,reason" + +while IFS='|' read -r pod_ns pod_name pod_ip container_name port process; do + [[ -z "$port" ]] && continue + scan_endpoint "$pod_ns" "$pod_name" "$pod_ip" "$container_name" "$port" "$process" & +done < /tmp/endpoints.txt + +wait diff --git a/tests/functional/tlsprofile/test_centralized_tlsprofile_configuration.py b/tests/functional/tlsprofile/test_centralized_tlsprofile_configuration.py new file mode 100644 index 000000000000..74b5b3a4bfc6 --- /dev/null +++ b/tests/functional/tlsprofile/test_centralized_tlsprofile_configuration.py @@ -0,0 +1,244 @@ +import logging + +import pytest + +from ocs_ci.framework import config +from ocs_ci.framework.pytest_customization.marks import ( + brown_squad, + ignore_leftover_label, + skipif_external_mode, + skipif_fips_enabled, + skipif_managed_service, + skipif_ocs_version, + tier3, +) +from ocs_ci.framework.testlib import ManageTest +from ocs_ci.ocs import constants +from ocs_ci.helpers.tlsprofile_helper import ( + TLS_PROFILE_SELECTOR_NOOBAA_DOMAIN, + TLS_PROFILE_SELECTOR_RGW_DOMAIN, + TLS_PROFILE_V12_CIPHERS, + TLS_PROFILE_V12_GROUPS, + TLS_PROFILE_V13_CIPHERS, + TLS_PROFILE_V13_GROUPS, + TLSProfile, + assert_no_tls_errors_in_relevant_pod_logs, + assert_tls_scan_results_include_version, + get_first_cephobjectstore_name, + scan_cluster, + tlsprofile_crd_exists, + wait_for_cephobjectstore_security_cleared, + wait_for_cephobjectstore_tls_ciphers_substring, + wait_for_noobaa_api_server_security_absent, + wait_for_noobaa_tls_min_version_substring, + wait_for_tlsprofile_config_version, +) + +log = logging.getLogger(__name__) + + +@pytest.fixture(scope="module", autouse=True) +def require_tlsprofile_crd(): + if not tlsprofile_crd_exists(): + pytest.skip( + "TLSProfile CRD tlsprofiles.ocs.openshift.io not found on this cluster" + ) + + +@brown_squad +@tier3 +@skipif_ocs_version("<4.22") +@skipif_fips_enabled +@skipif_external_mode +@skipif_managed_service +@ignore_leftover_label(constants.NOOBAA_ENDPOINT_POD_LABEL) +class TestCentralizedTLSProfileConfiguration(ManageTest): + """ + Lifecycle tests for centralized ``TLSProfile`` (DF 4.22+): TLS 1.3 / 1.2 rules, + operand checks, in-cluster TLS scan, then delete ``ocs-tls-profile``. + + Skips on FIPS (PQ / ChaCha in our cipher lists). Deletes the CR at the end—only + run where that is safe. An autouse fixture also deletes a leftover ``ocs-tls-profile`` + if the test aborts before the in-test delete (failures, timeouts). NooBaa endpoint + pods may roll; leftover ignore matches other MCG tests. + """ + + @pytest.fixture(autouse=True) + def cleanup_tlsprofile(self, request): + tls = TLSProfile() + + def _cleanup(): + try: + if tls.is_tls_profile_available(silent=True): + log.info("Teardown: deleting leftover ocs-tls-profile") + tls.delete_tls_profile(wait=True, force=True) + except Exception as exc: + log.warning("Teardown: failed to delete TLSProfile: %s", exc) + + request.addfinalizer(_cleanup) + + @pytest.mark.parametrize( + argnames="component,selectors", + argvalues=[ + pytest.param( + "all", + ["*"], + id="Centralized TLSProfile: wildcard selector applies to MCG and RGW", + ), + pytest.param( + "noobaa", + [TLS_PROFILE_SELECTOR_NOOBAA_DOMAIN], + id="Centralized TLSProfile: noobaa.io selector", + ), + pytest.param( + "rgw", + [TLS_PROFILE_SELECTOR_RGW_DOMAIN], + marks=pytest.mark.skipif( + config.ENV_DATA.get("mcg_only_deployment"), + reason="No RGW/CephObjectStore in mcg-only deployments", + ), + id="Centralized TLSProfile: ceph.rook.io (RGW) selector", + ), + ], + ) + def test_tls_profile_version_lifecycle(self, component, selectors): + """ + Centralized TLSProfile: version toggle (1.3 / 1.2 / 1.3), scan, delete, log check. + + Steps: + 1. Detect whether ``ocs-tls-profile`` exists (required metadata name per product). + 2. If missing, create it with TLS 1.3 and DF-supported cipher/group sets. + 3. Confirm spec shows TLSv1.3 and (where applicable) operands follow; run an + in-cluster port scan and assert ``tls1.3`` appears on at least one ``OK`` + endpoint (other ports may use TLS 1.2 only). + 4. Patch to TLS 1.2, verify operands where applicable; scan and assert ``tls1.2`` + appear on at least one ``OK`` endpoint. + 5. Patch back to TLS 1.3; scan and assert ``tls1.3`` on at least one ``OK`` + appear on at least one ``OK`` endpoint. + 6. Delete the TLSProfile. + 7. Confirm operator default (NooBaa/RGW TLSProfile propagation cleared). + 8. Scan operator/workload pod logs for TLS-related error lines (heuristic). + """ + namespace = config.ENV_DATA["cluster_namespace"] + mcg_only = config.ENV_DATA.get("mcg_only_deployment") + + verify_nb = component in ("noobaa", "all") + verify_rgw = component in ("rgw", "all") and not mcg_only + cos_name = get_first_cephobjectstore_name(namespace) if verify_rgw else None + if verify_rgw and cos_name is None: + if component == "rgw": + pytest.skip( + f"No CephObjectStore in {namespace}; RGW parametrization requires an " + "object store (check namespace, ODF build, or that RGW is deployed)." + ) + log.warning( + "No CephObjectStore in %s; skipping RGW-side assertions for selector-all", + namespace, + ) + verify_rgw = False + + tls = TLSProfile() + assert ( + tls.name == "ocs-tls-profile" + ), "TLSProfile metadata.name must be ocs-tls-profile" + + if not tls.is_tls_profile_available(): + log.info("TLSProfile absent; creating with TLSv1.3") + tls.create_tls_profile( + selectors=selectors, + tls_version="TLSv1.3", + ciphers=TLS_PROFILE_V13_CIPHERS, + groups=TLS_PROFILE_V13_GROUPS, + ) + else: + log.info("TLSProfile exists; normalizing rule to TLSv1.3 before checks") + tls.replace_rules( + selectors, + "TLSv1.3", + TLS_PROFILE_V13_CIPHERS, + TLS_PROFILE_V13_GROUPS, + ) + + wait_for_tlsprofile_config_version(tls, "TLSv1.3") + assert tls.get_config_version() == "TLSv1.3" + + if verify_nb: + wait_for_noobaa_tls_min_version_substring(namespace, "1.3") + + log.info( + "In-cluster TLS scan: expect tls1.3 on ports that speak TLS (param %r)", + component, + ) + scan_after_v13 = scan_cluster(component=component, namespaces=[namespace]) + assert_tls_scan_results_include_version( + scan_after_v13, + "TLSv1.3", + context=f"TLSProfile TLSv1.3, component={component}", + ) + + log.info("Patch TLSProfile to TLSv1.2 and validate") + tls.replace_rules( + selectors, + "TLSv1.2", + TLS_PROFILE_V12_CIPHERS, + TLS_PROFILE_V12_GROUPS, + ) + wait_for_tlsprofile_config_version(tls, "TLSv1.2") + assert tls.get_config_version() == "TLSv1.2" + + if verify_nb: + wait_for_noobaa_tls_min_version_substring(namespace, "1.2") + + if verify_rgw: + wait_for_cephobjectstore_tls_ciphers_substring( + namespace, cos_name, "ECDHE", timeout=600, sleep=15 + ) + + log.info( + "In-cluster TLS scan: expect tls1.2 on ports that speak TLS (param %r)", + component, + ) + scan_after_v12 = scan_cluster(component=component, namespaces=[namespace]) + assert_tls_scan_results_include_version( + scan_after_v12, + "TLSv1.2", + context=f"TLSProfile TLSv1.2, component={component}", + ) + + log.info("Restore TLSv1.3 on TLSProfile, then delete resource") + tls.replace_rules( + selectors, + "TLSv1.3", + TLS_PROFILE_V13_CIPHERS, + TLS_PROFILE_V13_GROUPS, + ) + wait_for_tlsprofile_config_version(tls, "TLSv1.3") + + log.info( + "In-cluster TLS scan: expect tls1.3 after restore (param %r)", + component, + ) + scan_after_restore = scan_cluster(component=component, namespaces=[namespace]) + assert_tls_scan_results_include_version( + scan_after_restore, + "TLSv1.3", + context=f"TLSProfile restored to TLSv1.3, component={component}", + ) + + tls.delete_tls_profile(wait=True, force=False) + assert ( + not tls.is_tls_profile_available() + ), "TLSProfile should be absent after delete" + + if verify_nb: + wait_for_noobaa_api_server_security_absent(namespace) + + if verify_rgw: + wait_for_cephobjectstore_security_cleared( + namespace, cos_name, timeout=600, sleep=15 + ) + + log.info( + "Scanning operator/workload pod logs for TLS-related errors (last ~45m)" + ) + assert_no_tls_errors_in_relevant_pod_logs(namespace, component, since="45m")