diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml deleted file mode 100644 index 777492ca..00000000 --- a/config/forecasters-co1e.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Experiment with COSMO-1E emulators finetuned on COSMO-1E analysis - (KENDA-1) at 1km resolution. - -dates: - start: 2020-01-01T12:00 - end: 2020-01-10T00:00 - frequency: 54h - -runs: - - forecaster: - checkpoint: https://mlflow.ecmwf.int/#/experiments/367/runs/2174c939c8844555a52843b71219d425 - label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11 - config: resources/inference/configs/sgm-forecaster-regional_fromtraining.yaml - steps: 0/120/6 - inference_resources: - gpu: 4 - tasks: 4 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.6.3 - - - baseline: - label: COSMO-1E - root: /store_new/mch/msopr/ml/COSMO-1E - steps: 0/33/6 - -truth: - label: COSMO KENDA - root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co1e-an-archive-0p01-2019-2024-1h-v1-pl13.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -thresholds: - TOT_PREC: - gt: [0.0, 0.001, 0.005] - U_10M: - gt: [2.5, 5.0, 10.0] - V_10M: - gt: [2.5, 5.0, 10.0] - T_2M: - lt: [273.15] - gt: [288.15, 298.15] - -dashboard: - stratification: - # - init_hour - # - region - - season - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 - batch_rules: - plot_forecast_frame: 32 diff --git a/config/forecasters-co2-disentangled.yaml b/config/forecasters-co2-disentangled.yaml deleted file mode 100644 index 77bdba4b..00000000 --- a/config/forecasters-co2-disentangled.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Experiment to compare entangled and disentangled forecasting. With and without autoencoder finetuning. - -dates: - - 2020-02-03T00:00 # Storm Petra - - 2020-02-07T00:00 # Storm Sabine - - 2020-10-01T00:00 # Storm Brigitte - -runs: - - forecaster: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/409/runs/2241012852624833b73b2c933db608c9 - label: Stage C entangled - config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - earthkit-utils<0.2.0 - - earthkit-data<0.19.0 - - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models - - - forecaster: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/307/runs/f9f0b0a0c91949b6a72df2dc23c55255 - label: dis_HAE_1lvl_ft_n320_cosmo_stage_C_direct - config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - earthkit-utils<0.2.0 - - earthkit-data<0.19.0 - - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models - - - forecaster: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/307/runs/60db882d54174b4582d10dc5826d9eee - label: dis_HAE_1lvl_ft_n320_cosmo_stage_C - config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - earthkit-utils<0.2.0 - - earthkit-data<0.19.0 - - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models - - - baseline: - label: COSMO-E - root: /store_new/mch/msopr/ml/COSMO-E - steps: 0/120/6 - -truth: - label: COSMO KENDA - root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -thresholds: - TOT_PREC: - gt: [0.0, 0.001, 0.005] - U_10M: - gt: [2.5, 5.0, 10.0] - V_10M: - gt: [2.5, 5.0, 10.0] - T_2M: - lt: [273.15] - gt: [288.15, 298.15] - -dashboard: - stratification: - # - init_hour - # - region - - season - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 diff --git a/config/forecasters-co2.yaml b/config/forecasters-co2.yaml deleted file mode 100644 index a51c1203..00000000 --- a/config/forecasters-co2.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Evaluate skill of COSMO-E emulator (M-1 forecaster). - -dates: - - 2020-02-03T00:00 # Storm Petra - - 2020-02-07T00:00 # Storm Sabine - - 2020-10-01T00:00 # Storm Brigitte - -runs: - - forecaster: - checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511 - label: M-1 forecaster - steps: 0/120/6 - config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - - baseline: - label: COSMO-E - root: /store_new/mch/msopr/ml/COSMO-E - steps: 0/120/6 - -truth: - label: COSMO KENDA - root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -thresholds: - TOT_PREC: - gt: [0.0, 0.001, 0.005] - U_10M: - gt: [2.5, 5.0, 10.0] - V_10M: - gt: [2.5, 5.0, 10.0] - T_2M: - lt: [273.15] - gt: [288.15, 298.15] - -dashboard: - stratification: - # - init_hour - # - region - - season - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 - batch_rules: - plot_forecast_frame: 32 diff --git a/config/forecasters-ich1-oper-fixed.yaml b/config/forecasters-ich1-oper-fixed.yaml index 9c5cb970..ab3643f7 100644 --- a/config/forecasters-ich1-oper-fixed.yaml +++ b/config/forecasters-ich1-oper-fixed.yaml @@ -34,7 +34,7 @@ runs: baselines: - baseline: label: ICON-CH1-EPS - root: /store_new/mch/msopr/ml/ICON-CH1-EPS + root: /store_new/mch/msopr/osm/ICON-CH1-EPS steps: 0/33/6 truth: diff --git a/config/forecasters-ich1-oper.yaml b/config/forecasters-ich1-oper.yaml index cac91861..5316da3b 100644 --- a/config/forecasters-ich1-oper.yaml +++ b/config/forecasters-ich1-oper.yaml @@ -24,12 +24,12 @@ runs: - baseline: label: ICON-CH1-ctrl - root: /store_new/mch/msopr/ml/ICON-CH1-EPS + root: /store_new/mch/msopr/osm/ICON-CH1-EPS steps: 0/33/6 - baseline: label: ICON-CH2-ctrl - root: /store_new/mch/msopr/ml/ICON-CH2-EPS + root: /store_new/mch/msopr/osm/ICON-CH2-EPS steps: 0/120/6 truth: diff --git a/config/forecasters-ich1.yaml b/config/forecasters-ich1.yaml index 3f8ee7db..001bff0f 100644 --- a/config/forecasters-ich1.yaml +++ b/config/forecasters-ich1.yaml @@ -41,7 +41,7 @@ runs: - baseline: label: ICON-CH2-EPS - root: /store_new/mch/msopr/ml/ICON-CH2-EPS + root: /store_new/mch/msopr/osm/ICON-CH2-EPS steps: 0/120/6 truth: diff --git a/config/interpolators-co2.yaml b/config/interpolators-co2.yaml deleted file mode 100644 index 87ea6fa3..00000000 --- a/config/interpolators-co2.yaml +++ /dev/null @@ -1,101 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Evaluate skill of SGM interpolator (M-2 interpolator). - -dates: - start: 2020-01-01T12:00 - end: 2020-01-10T00:00 - frequency: 60h - # or (for showcases) - # - 2020-02-03T00:00 # Storm Petra - # - 2020-02-07T00:00 # Storm Sabine - # - 2020-10-01T00:00 # Storm Brigitte - -runs: - - interpolator: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/228/runs/8d1e0410ca7d4f74b368b3079878259a - label: M-2 interpolator (KENDA) - steps: 0/120/1 - config: resources/inference/configs/sgm-interpolator-global_trimedge_fromtest.yaml - forecaster: null - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - torch-geometric==2.6.1 - - anemoi-graphs==0.5.2 - - - interpolator: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/228/runs/8d1e0410ca7d4f74b368b3079878259a - label: M-2 interpolator (M-1 forecaster) - steps: 0/120/1 - config: resources/inference/configs/sgm-interpolator-global_trimedge.yaml - forecaster: - checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511 - config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - torch-geometric==2.6.1 - - anemoi-graphs==0.5.2 - - - forecaster: - checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511 - label: M-1 forecaster - config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3 - - - baseline: - label: COSMO-E - root: /store_new/mch/msopr/ml/COSMO-E_hourly - steps: 0/120/1 - -truth: - label: COSMO KENDA - root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-1h-v3-pl13.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -thresholds: - TOT_PREC: - gt: [0.0, 0.001, 0.005] - U_10M: - gt: [2.5, 5.0, 10.0] - V_10M: - gt: [2.5, 5.0, 10.0] - T_2M: - lt: [273.15] - gt: [288.15, 298.15] - -dashboard: - stratification: - # - init_hour - # - region - - season - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 - batch_rules: - plot_forecast_frame: 32 diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index a56296eb..92648b1f 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -31,14 +31,12 @@ runs: # pinned anemoi-datasets because of ecmwf/anemoi-utils#284, can be removed when fixed - anemoi-datasets==0.5.35 - baseline: - baseline_id: ICON-CH2-EPS label: ICON-CH2-ctrl - root: /store_new/mch/msopr/ml/ICON-CH2-EPS + root: /store_new/mch/msopr/osm/ICON-CH2-EPS steps: 0/120/1 - baseline: - baseline_id: ICON-CH1-EPS label: ICON-CH1-ctrl - root: /store_new/mch/msopr/ml/ICON-CH1-EPS + root: /store_new/mch/msopr/osm/ICON-CH1-EPS steps: 0/33/1 truth: diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py index 3c990b70..2c49151e 100644 --- a/src/data_input/__init__.py +++ b/src/data_input/__init__.py @@ -107,25 +107,64 @@ def load_analysis_data_from_zarr( return _select_valid_times(ds, times) -def load_fct_data_from_grib( - root: Path, reftime: datetime, steps: list[int], params: list[str] -) -> xr.Dataset: - """Load forecast data from GRIB files for a specific valid time.""" +def _collect_ml_grib_files( + root: Path, reftime: datetime, steps: list[int] | None = None +) -> list[Path]: + """Return GRIB files for an ML inference run (flat directory layout). + + When `steps` is provided, the discovered files are filtered to those whose + name ends with ``_{step:03d}.grib``. + """ files = sorted(root.glob(f"{reftime:%Y%m%d%H%M}*.grib")) + if steps is None: + return files + suffixes = {f"_{step:03d}.grib" for step in steps} + return [f for f in files if any(f.name.endswith(s) for s in suffixes)] + + +def _collect_icon_archive_files( + root: Path, reftime: datetime, steps: list[int], member_id: str = "000" +) -> list[Path]: + """Return surface GRIB files for one member of an ICON operational archive. + + `root` is the FCST directory, e.g. + ``/store_new/mch/msopr/osm/ICON-CH1-EPS/FCST25``. + """ + reftime_dirs = sorted(root.glob(f"{reftime:%y%m%d%H}_*")) + if not reftime_dirs: + raise ValueError( + f"No archive subdirectory found for {reftime:%y%m%d%H} in {root}" + ) + reftime_dir = reftime_dirs[-1] + LOG.info("Reading ICON archive from %s", reftime_dir) + + if "ICON-CH1-EPS" in root.parts: + gribname = "i1eff" + elif "ICON-CH2-EPS" in root.parts: + gribname = "i2eff" + else: + raise ValueError( + f"Cannot determine model from path (expected ICON-CH1-EPS or " + f"ICON-CH2-EPS): {root}" + ) + + return [ + reftime_dir / "grib" / f"{gribname}{lt // 24:02}{lt % 24:02}0000_{member_id}" + for lt in steps + ] + + +def load_fct_data_from_grib(files: list[Path], params: list[str]) -> xr.Dataset: + """Load forecast data from a list of GRIB files.""" fds = data_source.FileDataSource(datafiles=files) - ds = grib_decoder.load(fds, {"param": params, "step": steps}) + ds = grib_decoder.load(fds, {"param": params}) for var, da in ds.items(): if "z" in da.dims and da.sizes["z"] == 1: ds[var] = da.squeeze("z", drop=True) elif "z" in da.dims and da.sizes["z"] > 1: ds[var] = da.rename({"z": da.attrs["vcoord_type"]}) ds = xr.merge([ds[p].rename(p) for p in ds], compat="no_conflicts") - lead_times = np.array(steps, dtype="timedelta64[h]") - # Restrict to the requested lead times so that the TOT_PREC disaggregation - # below operates on the correct step interval even if the GRIB contains - # extra (e.g. hourly) steps beyond those requested — e.g. when consuming - # output from an interpolator emulator or a baseline with sub-step output. - ds = ds.sel(lead_time=lead_times) + lead_times = ds.lead_time.values if "TOT_PREC" in ds.data_vars: ## Disaggregate TOT_PREC from cumulative-from-start (expected when the ## accumulate_from_start_of_forecast post-processor is enabled in @@ -171,7 +210,7 @@ def load_fct_data_from_grib( ds = ds.rename({"valid_time": "time"}) if "time" not in ds.coords: ds = ds.assign_coords(time=ds.ref_time + ds.lead_time) - ds = ds.sel(ref_time=reftime) + ds = ds.squeeze("ref_time", drop=False) # rename 'cell' dimension to 'values' (it's earthkit-data default for flattened spatial dim) if "cell" in ds.dims: @@ -179,55 +218,6 @@ def load_fct_data_from_grib( return ds -def load_baseline_from_zarr( - root: Path, reftime: datetime, steps: list[int], params: list[str] -) -> xr.Dataset: - """Load forecast data from a Zarr dataset.""" - try: - baseline = xr.open_zarr(root, consolidated=True, decode_timedelta=True) - except ValueError: - raise ValueError(f"Could not open baseline zarr at {root}") - - baseline = baseline.rename( - {"forecast_reference_time": "ref_time", "step": "lead_time"} - ).sortby("lead_time") - lead_times = np.array(steps, dtype="timedelta64[h]") - # Restrict to the requested lead times up-front so that the TOT_PREC - # disaggregation below operates on the correct step interval, and so that - # all other variables avoid loading unused hourly steps from the zarr. - baseline = baseline[params].sel(ref_time=reftime, lead_time=lead_times) - if "TOT_PREC" in baseline.data_vars: - if baseline.TOT_PREC.units == "m": - baseline = baseline.assign(TOT_PREC=lambda x: x.TOT_PREC * 1000) - baseline.TOT_PREC.attrs["units"] = "kg m-2" - ## Disaggregate TOT_PREC from cumulative-from-start (the expected zarr - ## convention for processed NWP output) to per-step accumulations. - ## - ## Sanity-check that the incoming data is actually cumulative: if - ## .diff() produces significantly negative values, TOT_PREC is already - ## period-accumulated and a second disaggregation would produce - ## garbage. In that case raise — we always expect cumulative-from- - ## start precipitation here. - diff = baseline.TOT_PREC.diff("lead_time") - min_diff = float(diff.min().compute()) - if min_diff < -0.1: # TOT_PREC canonical units are mm - raise ValueError( - f"TOT_PREC in the baseline zarr appears to already be " - f"period-accumulated (min(.diff()) = {min_diff:.3e} m)." - ) - ## .diff() drops lead_time=0; .reindex() restores it as NaN (no - ## accumulation period exists at the forecast initial time). Clip - ## small float-noise negatives to zero (anything below -0.1 mm has - ## already been caught by the check above). - baseline = baseline.assign( - TOT_PREC=diff.clip(min=0.0).reindex(lead_time=lead_times) - ) - baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time) - if "latitude" in baseline.coords and "longitude" in baseline: - baseline = baseline.rename({"latitude": "lat", "longitude": "lon"}) - return baseline - - def load_obs_data_from_peakweather( root, reftime: datetime, steps: list[int], params: list[str], freq: str = "1h" ) -> xr.Dataset: @@ -316,23 +306,22 @@ def load_truth_data( def load_forecast_data( root, reftime: datetime, steps: list[int], params: list[str] ) -> xr.Dataset: - """Load forecast data from GRIB files or a baseline Zarr dataset.""" + """Load forecast data from GRIB files or an ICON archive. + Routing (in order): + 1. ``*.grib`` files present in *root* → :func:`load_fct_data_from_grib` + (ML inference output) + 2. Otherwise → ICON operational archive + """ + root = Path(root) if any(root.glob("*.grib")): LOG.info("Loading forecasts from GRIB files...") - fcst = load_fct_data_from_grib( - root=root, - reftime=reftime, - steps=steps, - params=params, - ) - else: - LOG.info("Loading baseline forecasts from zarr dataset...") - fcst = load_baseline_from_zarr( - root=root, - reftime=reftime, - steps=steps, + return load_fct_data_from_grib( + files=_collect_ml_grib_files(root, reftime, steps), params=params, ) - - return fcst + LOG.info("Loading baseline forecasts from ICON GRIB archive...") + return load_fct_data_from_grib( + files=_collect_icon_archive_files(root, reftime, steps), + params=params, + ) diff --git a/tests/conftest.py b/tests/conftest.py index 326f9199..717d56e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,7 @@ @pytest.fixture def example_forecasters_config(): - configfile = PROJECT_ROOT / "config/forecasters-co2.yaml" + configfile = PROJECT_ROOT / "config/forecasters-ich1.yaml" with open(configfile, "r") as f: config = yaml.safe_load(f) return config @@ -16,7 +16,7 @@ def example_forecasters_config(): @pytest.fixture def example_interpolators_config(): - configfile = PROJECT_ROOT / "config/interpolators-co2.yaml" + configfile = PROJECT_ROOT / "config/interpolators-ich1.yaml" with open(configfile, "r") as f: config = yaml.safe_load(f) return config diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 40281a6e..4c715931 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pytest from evalml.config import ConfigModel @@ -41,53 +39,3 @@ def test_legacy_top_level_baselines_still_supported(example_forecasters_config): ] _ = ConfigModel.model_validate(cfg) - - -def test_workflow_parsing_excludes_baselines_from_run_configs( - example_forecasters_config, -): - """Baseline entries in `runs` should not be treated as ML run configs.""" - - namespace = { - "Path": Path, - "config": example_forecasters_config, - } - common_rules = Path("workflow/rules/common.smk").read_text() - - exec(common_rules, namespace) - - run_configs = namespace["RUN_CONFIGS"] - baseline_configs = namespace["BASELINE_CONFIGS"] - - assert all( - run_config["model_type"] != "baseline" for run_config in run_configs.values() - ) - assert baseline_configs == { - "COSMO-E": { - "label": "COSMO-E", - "root": "/store_new/mch/msopr/ml/COSMO-E", - "steps": "0/120/6", - } - } - - -def test_workflow_derives_baseline_id_from_root_stem(example_interpolators_config): - """Workflow baseline IDs should come from the baseline root path stem.""" - - namespace = { - "Path": Path, - "config": example_interpolators_config, - } - common_rules = Path("workflow/rules/common.smk").read_text() - - exec(common_rules, namespace) - - baseline_configs = namespace["BASELINE_CONFIGS"] - - assert "COSMO-E_hourly" in baseline_configs - assert "COSMO-E-1h" not in baseline_configs - assert baseline_configs["COSMO-E_hourly"] == { - "label": "COSMO-E", - "root": "/store_new/mch/msopr/ml/COSMO-E_hourly", - "steps": "0/120/1", - } diff --git a/workflow/Snakefile b/workflow/Snakefile index a2586cb4..becffcb6 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -31,6 +31,7 @@ CONFIG_FILE = workflow.config_settings.configfiles[0] CONFIG_LABEL = config["config_label"] or CONFIG_FILE.stem EXPERIMENT_NAME = f"{WHEN}_{CONFIG_LABEL}_{CONFIG_HASH}" CANDIDATES = collect_all_candidates() +BASELINES = collect_all_baselines() DATA_DIR = OUT_ROOT / "data" LOGS_DIR = OUT_ROOT / "logs" @@ -83,6 +84,7 @@ onstart: print(_c(f" Config: {CONFIG_FILE.name}", "90")) print(_c(f" Experiment: {EXPERIMENT_NAME}", "90")) print(_c(f" Candidates: {", ".join(CANDIDATES)}", "90")) + print(_c(f" Baselines: {", ".join(BASELINES)}", "90")) print(_c(f" Data dir: {DATA_DIR}", "90")) print(_c(f" Logs dir: {LOGS_DIR}", "90")) print(_c(f" Results dir: {RESULTS_DIR}", "90")) diff --git a/workflow/rules/verification.smk b/workflow/rules/verification.smk index 58b215e5..683c65f1 100644 --- a/workflow/rules/verification.smk +++ b/workflow/rules/verification.smk @@ -10,16 +10,19 @@ include: "common.smk" # TODO: make sure the boundaries aren't used +def _get_baseline_forecast_path(wc): + """Return the FCST directory for a baseline in the ICON GRIB archive.""" + root = BASELINE_CONFIGS[wc.baseline_id].get("root") + year = wc.init_time[2:4] + return f"{root}/FCST{year}" + + rule verification_metrics_baseline: input: "src/verification/__init__.py", "src/data_input/__init__.py", script="workflow/scripts/verification_metrics.py", - baseline_zarr=lambda wc: expand( - "{root}/FCST{year}.zarr", - root=BASELINE_CONFIGS[wc.baseline_id].get("root"), - year=wc.init_time[2:4], - ), + forecast=_get_baseline_forecast_path, truth=config["truth"]["root"], params: baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"), @@ -38,7 +41,7 @@ rule verification_metrics_baseline: shell: """ uv run {input.script} \ - --forecast {input.baseline_zarr} \ + --forecast {input.forecast} \ --truth {input.truth} \ --reftime {wildcards.init_time} \ --steps "{params.baseline_steps}" \