Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions engine/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
tolerances and computing divergence between DataFrames.
"""

import json
import sys

import click
Expand Down Expand Up @@ -43,11 +44,22 @@
default="",
help=cli_help["fof_types"],
)
@click.option("--rules", default="")
@click.option(
"--rules",
default="{}",
help=cli_help["rules"],
)
def check(
reference_files, current_files, tolerance_files, factor, fof_types, rules
reference_files,
current_files,
tolerance_files,
factor,
fof_types,
rules: str,
): # pylint: disable=too-many-positional-arguments

parsed_rules = json.loads(rules)

zipped = zip(reference_files, current_files, tolerance_files)

expanded_zip = expand_fof(zipped, fof_types)
Expand All @@ -61,7 +73,7 @@ def check(
FileInfo(reference_file),
FileInfo(current_file),
factor,
rules,
rules=parsed_rules,
)

if out:
Expand Down
14 changes: 11 additions & 3 deletions engine/fof_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Veri data are not considered, only reports and observations are compared.
"""

import json
import tempfile

import click
Expand Down Expand Up @@ -42,8 +43,15 @@
"--tolerance",
default=1e-12,
)
@click.option("--rules", default="")
def fof_compare(file1, file2, fof_types, tolerance, rules):
@click.option(
"--rules",
default="{}",
type=str,
help=cli_help["rules"],
)
def fof_compare(file1, file2, fof_types, tolerance, rules: str):

parsed_rules = json.loads(rules)

for fof_type in fof_types:
file1_path = file1.format(fof_type=fof_type)
Expand All @@ -66,7 +74,7 @@ def fof_compare(file1, file2, fof_types, tolerance, rules):
FileInfo(file1_path),
FileInfo(file2_path),
factor=1,
rules=rules,
rules=parsed_rules,
)

if out:
Expand Down
2 changes: 1 addition & 1 deletion tests/engine/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_check_cli_fof(fof_datasets):
"""

df1, df2, tol_large, tol_small = fof_datasets
rules = {"check": [13, 18, 32], "state": [1, 5, 7, 9]}
rules = '{"check": [13, 18, 32], "state": [1, 5, 7, 9]}'

runner = CliRunner()
result = runner.invoke(
Expand Down
41 changes: 37 additions & 4 deletions tests/engine/test_fof_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def fixture_fof_datasets(fof_datasets_base, tmp_dir):
"""
ds1, ds2, _, _ = fof_datasets_base
ds3 = ds2.copy(deep=True)
ds3["flags"] = (("d_body",), ds3["flags"].values * 1.55)
ds3["flags"] = (("d_body",), ds3["flags"].values * 2)

ds1_file = os.path.join(tmp_dir, "fof1_SYNOP.nc")
ds2_file = os.path.join(tmp_dir, "fof2_SYNOP.nc")
Expand All @@ -43,7 +43,7 @@ def test_fof_compare_works(fof_datasets, tmp_dir, monkeypatch):
df1 = df1.replace("SYNOP", "{fof_type}")
df2 = df2.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)
rules = ""
rules = "{}"
runner = CliRunner()

result = runner.invoke(
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
df3 = df3.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)

rules = ""
rules = "{}"
runner = CliRunner()
with caplog.at_level(logging.INFO):
runner.invoke(
Expand All @@ -102,6 +102,39 @@ def test_fof_compare_not_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
assert "Files are NOT consistent!" in caplog.text


def test_fof_compare_rules(fof_datasets, tmp_dir, monkeypatch, caplog):
"""
Test that if there are differences in the files, then fof-compare writes
in the log file that the files are not consistent.
"""

df1, _, df3 = fof_datasets
df1 = df1.replace("SYNOP", "{fof_type}")
df3 = df3.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)

rules = '{"flags": [9, 18]}'
runner = CliRunner()
with caplog.at_level(logging.INFO):
runner.invoke(
fof_compare,
[
"--file1",
df1,
"--file2",
df3,
"--fof-types",
"SYNOP",
"--tolerance",
"5",
"--rules",
rules,
],
)

assert "Files are consistent!" in caplog.text


def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
"""
Test that if there are no differences in the files and the tolerance is big
Expand All @@ -113,7 +146,7 @@ def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
df2 = df2.replace("SYNOP", "{fof_type}")
monkeypatch.chdir(tmp_dir)

rules = ""
rules = "{}"
runner = CliRunner()
with caplog.at_level(logging.INFO):
runner.invoke(
Expand Down
4 changes: 4 additions & 0 deletions util/click_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ def convert(self, value, param, ctx):
+ r"than the reference before a warning gets printed.",
"minimum_tolerance": r"Non-zero value to set variable tolerances to when the "
+ r"calculated tolerances from the ensemble are exactly zero.",
"rules": (
"JSON object specifying the rules for comparison. "
'Example: \'{"check":[13,18,32],"state":[1,5,7,9]}\''
),
}

del dataframe_ops
37 changes: 18 additions & 19 deletions util/dataframe_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
reference datasets with specified tolerances.
"""

import ast
import sys
import warnings
from typing import Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -328,7 +328,11 @@ def parse_check(


def check_file_with_tolerances(
tolerance_file_name, input_file_ref, input_file_cur, factor, rules=""
tolerance_file_name,
input_file_ref,
input_file_cur,
factor,
rules: Optional[dict[str, list[int]]] = None,
):
"""
This function calculates the relative difference between the current file and
Expand All @@ -337,6 +341,8 @@ def check_file_with_tolerances(
For FOF-type files, it also performs an additional check on variables with multiple
possible values to ensure that any variations remain within the allowed range.
"""
if rules is None:
rules = {}

if input_file_ref.file_type != input_file_cur.file_type:
logger.critical(
Expand Down Expand Up @@ -410,17 +416,9 @@ def has_enough_data(dfs):
}


def parse_rules(rules):
if isinstance(rules, dict):
return rules

if isinstance(rules, str) and rules.strip():
return ast.literal_eval(rules)

return {}


def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
def compare_cells_rules(
ref_df, cur_df, cols, rules: dict[str, list[int]], detailed_logger
):
"""
This function compares two DataFrames cell by cell for a selected set of columns.
For each row and column, it ignores values that are equal or whose differences
Expand All @@ -438,7 +436,7 @@ def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
if val1 == val2:
continue

allowed = rules_dict.get(col, [])
allowed = rules.get(col, [])
if val1 in allowed and val2 in allowed:
continue

Expand All @@ -454,7 +452,9 @@ def compare_cells_rules(ref_df, cur_df, cols, rules_dict, detailed_logger):
return errors


def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name):
def check_multiple_solutions_from_dict(
dict_ref, dict_cur, rules: dict[str, list[int]], log_file_name
):
"""
This function compares two Python dictionaries, each containing DataFrames under
the keys "reports" and "observation", row by row and column by column, according
Expand All @@ -463,7 +463,6 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
It records the row, column and invalid values in a log file.
"""

rules_dict = parse_rules(rules)
errors = False
detailed_logger = initialize_detailed_logger(
"DETAILS", log_level="DEBUG", log_file=log_file_name
Expand All @@ -473,8 +472,8 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
cur_df = dict_cur[key]
common_cols = [col for col in ref_df.columns if col in cur_df.columns]

cols_with_rules = [col for col in common_cols if col in rules_dict]
cols_without_rules = [col for col in common_cols if col not in rules_dict]
cols_with_rules = [col for col in common_cols if col in rules]
cols_without_rules = [col for col in common_cols if col not in rules]

if cols_without_rules:
t, e = compare_var_and_attr_ds(
Expand All @@ -487,7 +486,7 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)

if cols_with_rules:
errors = compare_cells_rules(
ref_df, cur_df, cols_with_rules, rules_dict, detailed_logger
ref_df, cur_df, cols_with_rules, rules, detailed_logger
)
clean_logger_file_if_only_details(log_file_name)
return errors
Loading