MeteoSwiss · huppd · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/engine/check.py b/engine/check.py
@@ -7,6 +7,7 @@
 tolerances and computing divergence between DataFrames.
 """
 
+import json
 import sys
 
 import click
@@ -43,11 +44,22 @@
     default="",
     help=cli_help["fof_types"],
 )
-@click.option("--rules", default="")
+@click.option(
+    "--rules",
+    default="{}",
+    help=cli_help["rules"],
+)
 def check(
-    reference_files, current_files, tolerance_files, factor, fof_types, rules
+    reference_files,
+    current_files,
+    tolerance_files,
+    factor,
+    fof_types,
+    rules: str,
 ):  # pylint: disable=too-many-positional-arguments
 
+    parsed_rules = json.loads(rules)
+
     zipped = zip(reference_files, current_files, tolerance_files)
 
     expanded_zip = expand_fof(zipped, fof_types)
@@ -61,7 +73,7 @@ def check(
             FileInfo(reference_file),
             FileInfo(current_file),
             factor,
-            rules,
+            rules=parsed_rules,
         )
 
         if out:

diff --git a/engine/fof_compare.py b/engine/fof_compare.py
@@ -6,6 +6,7 @@
 Veri data are not considered, only reports and observations are compared.
 """
 
+import json
 import tempfile
 
 import click
@@ -42,8 +43,15 @@
     "--tolerance",
     default=1e-12,
 )
-@click.option("--rules", default="")
-def fof_compare(file1, file2, fof_types, tolerance, rules):
+@click.option(
+    "--rules",
+    default="{}",
+    type=str,
+    help=cli_help["rules"],
+)
+def fof_compare(file1, file2, fof_types, tolerance, rules: str):
+
+    parsed_rules = json.loads(rules)
 
     for fof_type in fof_types:
         file1_path = file1.format(fof_type=fof_type)
@@ -66,7 +74,7 @@ def fof_compare(file1, file2, fof_types, tolerance, rules):
                 FileInfo(file1_path),
                 FileInfo(file2_path),
                 factor=1,
-                rules=rules,
+                rules=parsed_rules,
             )
 
             if out:

diff --git a/tests/engine/test_check.py b/tests/engine/test_check.py
@@ -84,7 +84,7 @@ def test_check_cli_fof(fof_datasets):
     """
 
     df1, df2, tol_large, tol_small = fof_datasets
-    rules = {"check": [13, 18, 32], "state": [1, 5, 7, 9]}
+    rules = '{"check": [13, 18, 32], "state": [1, 5, 7, 9]}'
 
     runner = CliRunner()
     result = runner.invoke(

diff --git a/tests/engine/test_fof_compare.py b/tests/engine/test_fof_compare.py
@@ -20,7 +20,7 @@ def fixture_fof_datasets(fof_datasets_base, tmp_dir):
     """
     ds1, ds2, _, _ = fof_datasets_base
     ds3 = ds2.copy(deep=True)
-    ds3["flags"] = (("d_body",), ds3["flags"].values * 1.55)
+    ds3["flags"] = (("d_body",), ds3["flags"].values * 2)
 
     ds1_file = os.path.join(tmp_dir, "fof1_SYNOP.nc")
     ds2_file = os.path.join(tmp_dir, "fof2_SYNOP.nc")
@@ -43,7 +43,7 @@ def test_fof_compare_works(fof_datasets, tmp_dir, monkeypatch):
     df1 = df1.replace("SYNOP", "{fof_type}")
     df2 = df2.replace("SYNOP", "{fof_type}")
     monkeypatch.chdir(tmp_dir)
-    rules = ""
+    rules = "{}"
     runner = CliRunner()
 
     result = runner.invoke(
@@ -80,7 +80,7 @@ def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
     df3 = df3.replace("SYNOP", "{fof_type}")
     monkeypatch.chdir(tmp_dir)
 
-    rules = ""
+    rules = "{}"
     runner = CliRunner()
     with caplog.at_level(logging.INFO):
         runner.invoke(
@@ -102,6 +102,39 @@ def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
     assert "Files are NOT consistent!" in caplog.text
 
 
+def test_fof_compare_rules(fof_datasets, tmp_dir, monkeypatch, caplog):
+    """
+    Test that if there are differences in the files, then fof-compare writes
+    in the log file that the files are not consistent.
+    """
+
+    df1, _, df3 = fof_datasets
+    df1 = df1.replace("SYNOP", "{fof_type}")
+    df3 = df3.replace("SYNOP", "{fof_type}")
+    monkeypatch.chdir(tmp_dir)
+
+    rules = '{"flags": [9, 18]}'
+    runner = CliRunner()
+    with caplog.at_level(logging.INFO):
+        runner.invoke(
+            fof_compare,
+            [
+                "--file1",
+                df1,
+                "--file2",
+                df3,
+                "--fof-types",
+                "SYNOP",
+                "--tolerance",
+                "5",
+                "--rules",
+                rules,
+            ],
+        )
+
+    assert "Files are consistent!" in caplog.text
+
+
 def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
     """
     Test that if there are no differences in the files and the tolerance is big
@@ -113,7 +146,7 @@ def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
     df2 = df2.replace("SYNOP", "{fof_type}")
     monkeypatch.chdir(tmp_dir)
 
-    rules = ""
+    rules = "{}"
     runner = CliRunner()
     with caplog.at_level(logging.INFO):
         runner.invoke(

diff --git a/util/click_util.py b/util/click_util.py
@@ -189,6 +189,10 @@ def convert(self, value, param, ctx):
     + r"than the reference before a warning gets printed.",
     "minimum_tolerance": r"Non-zero value to set variable tolerances to when the "
     + r"calculated tolerances from the ensemble are exactly zero.",
+    "rules": (
+        "JSON object specifying the rules for comparison. "
+        'Example: \'{"check":[13,18,32],"state":[1,5,7,9]}\''
+    ),
 }
 
 del dataframe_ops
diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
@@ -5,9 +5,9 @@
 reference datasets with specified tolerances.
 """
 
-import ast
 import sys
 import warnings
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -328,7 +328,11 @@ def parse_check(
 
 
 def check_file_with_tolerances(
-    tolerance_file_name, input_file_ref, input_file_cur, factor, rules=""
+    tolerance_file_name,
+    input_file_ref,
+    input_file_cur,
+    factor,
+    rules: Optional[dict[str, list[int]]] = None,
 ):
     """
     This function calculates the relative difference between the current file and
@@ -337,6 +341,8 @@ def check_file_with_tolerances(
     For FOF-type files, it also performs an additional check on variables with multiple
     possible values to ensure that any variations remain within the allowed range.
     """
+    if rules is None:
+        rules = {}
 
     if input_file_ref.file_type != input_file_cur.file_type:
         logger.critical(
@@ -410,17 +416,9 @@ def has_enough_data(dfs):
 }
 
 
-def parse_rules(rules):
-    if isinstance(rules, dict):
-        return rules
-
-    if isinstance(rules, str) and rules.strip():
-        return ast.literal_eval(rules)
-
-    return {}
-
-
-def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
+def compare_cells_rules(
+    ref_df, cur_df, cols, rules: dict[str, list[int]], detailed_logger
+):
     """
     This function compares two DataFrames cell by cell for a selected set of columns.
     For each row and column, it ignores values that are equal or whose differences
@@ -438,7 +436,7 @@ def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
             if val1 == val2:
                 continue
 
-            allowed = rules_dict.get(col, [])
+            allowed = rules.get(col, [])
             if val1 in allowed and val2 in allowed:
                 continue
 
@@ -454,7 +452,9 @@ def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
     return errors
 
 
-def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name):
+def check_multiple_solutions_from_dict(
+    dict_ref, dict_cur, rules: dict[str, list[int]], log_file_name
+):
     """
     This function compares two Python dictionaries, each containing DataFrames under
     the keys "reports" and "observation", row by row and column by column, according
@@ -463,7 +463,6 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
     It records the row, column and invalid values in a log file.
     """
 
-    rules_dict = parse_rules(rules)
     errors = False
     detailed_logger = initialize_detailed_logger(
         "DETAILS", log_level="DEBUG", log_file=log_file_name
@@ -473,8 +472,8 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
         cur_df = dict_cur[key]
         common_cols = [col for col in ref_df.columns if col in cur_df.columns]
 
-        cols_with_rules = [col for col in common_cols if col in rules_dict]
-        cols_without_rules = [col for col in common_cols if col not in rules_dict]
+        cols_with_rules = [col for col in common_cols if col in rules]
+        cols_without_rules = [col for col in common_cols if col not in rules]
 
         if cols_without_rules:
             t, e = compare_var_and_attr_ds(
@@ -487,7 +486,7 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
 
         if cols_with_rules:
             errors = compare_cells_rules(
-                ref_df, cur_df, cols_with_rules, rules_dict, detailed_logger
+                ref_df, cur_df, cols_with_rules, rules, detailed_logger
             )
     clean_logger_file_if_only_details(log_file_name)
     return errors