diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
deleted file mode 100644
index 777492ca..00000000
--- a/config/forecasters-co1e.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-# yaml-language-server: $schema=../workflow/tools/config.schema.json
-description: |
-  Experiment with COSMO-1E emulators finetuned on COSMO-1E analysis
-  (KENDA-1) at 1km resolution.
-
-dates:
-  start: 2020-01-01T12:00
-  end: 2020-01-10T00:00
-  frequency: 54h
-
-runs:
-  - forecaster:
-      checkpoint: https://mlflow.ecmwf.int/#/experiments/367/runs/2174c939c8844555a52843b71219d425
-      label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
-      config: resources/inference/configs/sgm-forecaster-regional_fromtraining.yaml
-      steps: 0/120/6
-      inference_resources:
-        gpu: 4
-        tasks: 4
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.6.3
-
-  - baseline:
-      label: COSMO-1E
-      root: /store_new/mch/msopr/ml/COSMO-1E
-      steps: 0/33/6
-
-truth:
-  label: COSMO KENDA
-  root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co1e-an-archive-0p01-2019-2024-1h-v1-pl13.zarr
-
-stratification:
-  regions:
-    - jura
-    - mittelland
-    - voralpen
-    - alpennordhang
-    - innerealpentaeler
-    - alpensuedseite
-  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
-
-thresholds:
-  TOT_PREC:
-    gt: [0.0, 0.001, 0.005]
-  U_10M:
-    gt: [2.5, 5.0, 10.0]
-  V_10M:
-    gt: [2.5, 5.0, 10.0]
-  T_2M:
-    lt: [273.15]
-    gt: [288.15, 298.15]
-
-dashboard:
-  stratification:
-    # - init_hour
-    # - region
-    - season
-
-locations:
-  output_root: output/
-
-profile:
-  executor: slurm
-  global_resources:
-    gpus: 16
-  default_resources:
-    slurm_partition: "postproc"
-    cpus_per_task: 1
-    mem_mb_per_cpu: 1800
-    runtime: "1h"
-    gpus: 0
-  jobs: 50
-  batch_rules:
-    plot_forecast_frame: 32
diff --git a/config/forecasters-co2-disentangled.yaml b/config/forecasters-co2-disentangled.yaml
deleted file mode 100644
index 77bdba4b..00000000
--- a/config/forecasters-co2-disentangled.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-# yaml-language-server: $schema=../workflow/tools/config.schema.json
-description: |
-  Experiment to compare entangled and disentangled forecasting. With and without autoencoder finetuning.
-
-dates:
-  - 2020-02-03T00:00 # Storm Petra
-  - 2020-02-07T00:00 # Storm Sabine
-  - 2020-10-01T00:00 # Storm Brigitte
-
-runs:
-  - forecaster:
-      checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/409/runs/2241012852624833b73b2c933db608c9
-      label: Stage C entangled
-      config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml
-      steps: 0/120/6
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-        - earthkit-utils<0.2.0
-        - earthkit-data<0.19.0
-        - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models
-
-  - forecaster:
-      checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/307/runs/f9f0b0a0c91949b6a72df2dc23c55255
-      label: dis_HAE_1lvl_ft_n320_cosmo_stage_C_direct
-      config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml
-      steps: 0/120/6
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-        - earthkit-utils<0.2.0
-        - earthkit-data<0.19.0
-        - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models
-
-  - forecaster:
-      checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/307/runs/60db882d54174b4582d10dc5826d9eee
-      label: dis_HAE_1lvl_ft_n320_cosmo_stage_C
-      config: resources/inference/configs/sgm-forecaster-global-disentangled.yaml
-      steps: 0/120/6
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-        - earthkit-utils<0.2.0
-        - earthkit-data<0.19.0
-        - git+https://github.com/MeteoSwiss/anemoi-core.git@2a90165e3f25defc55fbeb77f7b4ebfef685820d#subdirectory=models
-
-  - baseline:
-      label: COSMO-E
-      root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/120/6
-
-truth:
-  label: COSMO KENDA
-  root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr
-
-stratification:
-  regions:
-    - jura
-    - mittelland
-    - voralpen
-    - alpennordhang
-    - innerealpentaeler
-    - alpensuedseite
-  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
-
-thresholds:
-  TOT_PREC:
-    gt: [0.0, 0.001, 0.005]
-  U_10M:
-    gt: [2.5, 5.0, 10.0]
-  V_10M:
-    gt: [2.5, 5.0, 10.0]
-  T_2M:
-    lt: [273.15]
-    gt: [288.15, 298.15]
-
-dashboard:
-  stratification:
-    # - init_hour
-    # - region
-    - season
-
-locations:
-  output_root: output/
-
-profile:
-  executor: slurm
-  global_resources:
-    gpus: 16
-  default_resources:
-    slurm_partition: "postproc"
-    cpus_per_task: 1
-    mem_mb_per_cpu: 1800
-    runtime: "1h"
-    gpus: 0
-  jobs: 50
diff --git a/config/forecasters-co2.yaml b/config/forecasters-co2.yaml
deleted file mode 100644
index a51c1203..00000000
--- a/config/forecasters-co2.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-# yaml-language-server: $schema=../workflow/tools/config.schema.json
-description: |
-  Evaluate skill of COSMO-E emulator (M-1 forecaster).
-
-dates:
-  - 2020-02-03T00:00 # Storm Petra
-  - 2020-02-07T00:00 # Storm Sabine
-  - 2020-10-01T00:00 # Storm Brigitte
-
-runs:
-  - forecaster:
-      checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511
-      label: M-1 forecaster
-      steps: 0/120/6
-      config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-
-  - baseline:
-      label: COSMO-E
-      root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/120/6
-
-truth:
-  label: COSMO KENDA
-  root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-6h-v3-pl13.zarr
-
-stratification:
-  regions:
-    - jura
-    - mittelland
-    - voralpen
-    - alpennordhang
-    - innerealpentaeler
-    - alpensuedseite
-  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
-
-thresholds:
-  TOT_PREC:
-    gt: [0.0, 0.001, 0.005]
-  U_10M:
-    gt: [2.5, 5.0, 10.0]
-  V_10M:
-    gt: [2.5, 5.0, 10.0]
-  T_2M:
-    lt: [273.15]
-    gt: [288.15, 298.15]
-
-dashboard:
-  stratification:
-    # - init_hour
-    # - region
-    - season
-
-locations:
-  output_root: output/
-
-profile:
-  executor: slurm
-  global_resources:
-    gpus: 16
-  default_resources:
-    slurm_partition: "postproc"
-    cpus_per_task: 1
-    mem_mb_per_cpu: 1800
-    runtime: "1h"
-    gpus: 0
-  jobs: 50
-  batch_rules:
-    plot_forecast_frame: 32
diff --git a/config/forecasters-ich1-oper-fixed.yaml b/config/forecasters-ich1-oper-fixed.yaml
index 9c5cb970..ab3643f7 100644
--- a/config/forecasters-ich1-oper-fixed.yaml
+++ b/config/forecasters-ich1-oper-fixed.yaml
@@ -34,7 +34,7 @@ runs:
 baselines:
   - baseline:
       label: ICON-CH1-EPS
-      root: /store_new/mch/msopr/ml/ICON-CH1-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH1-EPS
       steps: 0/33/6
 
 truth:
diff --git a/config/forecasters-ich1-oper.yaml b/config/forecasters-ich1-oper.yaml
index cac91861..5316da3b 100644
--- a/config/forecasters-ich1-oper.yaml
+++ b/config/forecasters-ich1-oper.yaml
@@ -24,12 +24,12 @@ runs:
 
   - baseline:
       label: ICON-CH1-ctrl
-      root: /store_new/mch/msopr/ml/ICON-CH1-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH1-EPS
       steps: 0/33/6
 
   - baseline:
       label: ICON-CH2-ctrl
-      root: /store_new/mch/msopr/ml/ICON-CH2-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH2-EPS
       steps: 0/120/6
 
 truth:
diff --git a/config/forecasters-ich1.yaml b/config/forecasters-ich1.yaml
index 3f8ee7db..001bff0f 100644
--- a/config/forecasters-ich1.yaml
+++ b/config/forecasters-ich1.yaml
@@ -41,7 +41,7 @@ runs:
 
   - baseline:
       label: ICON-CH2-EPS
-      root: /store_new/mch/msopr/ml/ICON-CH2-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH2-EPS
       steps: 0/120/6
 
 truth:
diff --git a/config/interpolators-co2.yaml b/config/interpolators-co2.yaml
deleted file mode 100644
index 87ea6fa3..00000000
--- a/config/interpolators-co2.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# yaml-language-server: $schema=../workflow/tools/config.schema.json
-description: |
-  Evaluate skill of SGM interpolator (M-2 interpolator).
-
-dates:
-  start: 2020-01-01T12:00
-  end: 2020-01-10T00:00
-  frequency: 60h
-  # or (for showcases)
-  # - 2020-02-03T00:00 # Storm Petra
-  # - 2020-02-07T00:00 # Storm Sabine
-  # - 2020-10-01T00:00 # Storm Brigitte
-
-runs:
-  - interpolator:
-      checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/228/runs/8d1e0410ca7d4f74b368b3079878259a
-      label: M-2 interpolator (KENDA)
-      steps: 0/120/1
-      config: resources/inference/configs/sgm-interpolator-global_trimedge_fromtest.yaml
-      forecaster: null
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-        - torch-geometric==2.6.1
-        - anemoi-graphs==0.5.2
-
-  - interpolator:
-      checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/228/runs/8d1e0410ca7d4f74b368b3079878259a
-      label: M-2 interpolator (M-1 forecaster)
-      steps: 0/120/1
-      config: resources/inference/configs/sgm-interpolator-global_trimedge.yaml
-      forecaster:
-        checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511
-        config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
-        steps: 0/120/6
-        extra_requirements:
-          - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-        - torch-geometric==2.6.1
-        - anemoi-graphs==0.5.2
-
-  - forecaster:
-      checkpoint: https://mlflow.ecmwf.int/#/experiments/103/runs/d0846032fc7248a58b089cbe8fa4c511
-      label: M-1 forecaster
-      config: resources/inference/configs/sgm-forecaster-global_trimedge.yaml
-      steps: 0/120/6
-      extra_requirements:
-        - git+https://github.com/ecmwf/anemoi-inference.git@0.8.3
-
-  - baseline:
-      label: COSMO-E
-      root: /store_new/mch/msopr/ml/COSMO-E_hourly
-      steps: 0/120/1
-
-truth:
-  label: COSMO KENDA
-  root: /scratch/mch/fzanetta/data/anemoi/datasets/mch-co2-an-archive-0p02-2015-2020-1h-v3-pl13.zarr
-
-stratification:
-  regions:
-    - jura
-    - mittelland
-    - voralpen
-    - alpennordhang
-    - innerealpentaeler
-    - alpensuedseite
-  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
-
-thresholds:
-  TOT_PREC:
-    gt: [0.0, 0.001, 0.005]
-  U_10M:
-    gt: [2.5, 5.0, 10.0]
-  V_10M:
-    gt: [2.5, 5.0, 10.0]
-  T_2M:
-    lt: [273.15]
-    gt: [288.15, 298.15]
-
-dashboard:
-  stratification:
-    # - init_hour
-    # - region
-    - season
-
-locations:
-  output_root: output/
-
-profile:
-  executor: slurm
-  global_resources:
-    gpus: 16
-  default_resources:
-    slurm_partition: "postproc"
-    cpus_per_task: 1
-    mem_mb_per_cpu: 1800
-    runtime: "1h"
-    gpus: 0
-  jobs: 50
-  batch_rules:
-    plot_forecast_frame: 32
diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml
index a56296eb..92648b1f 100644
--- a/config/interpolators-ich1.yaml
+++ b/config/interpolators-ich1.yaml
@@ -31,14 +31,12 @@ runs:
         # pinned anemoi-datasets because of ecmwf/anemoi-utils#284, can be removed when fixed
         - anemoi-datasets==0.5.35
   - baseline:
-      baseline_id: ICON-CH2-EPS
       label: ICON-CH2-ctrl
-      root: /store_new/mch/msopr/ml/ICON-CH2-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH2-EPS
       steps: 0/120/1
   - baseline:
-      baseline_id: ICON-CH1-EPS
       label: ICON-CH1-ctrl
-      root: /store_new/mch/msopr/ml/ICON-CH1-EPS
+      root: /store_new/mch/msopr/osm/ICON-CH1-EPS
       steps: 0/33/1
 
 truth:
diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py
index 3c990b70..2c49151e 100644
--- a/src/data_input/__init__.py
+++ b/src/data_input/__init__.py
@@ -107,25 +107,64 @@ def load_analysis_data_from_zarr(
     return _select_valid_times(ds, times)
 
 
-def load_fct_data_from_grib(
-    root: Path, reftime: datetime, steps: list[int], params: list[str]
-) -> xr.Dataset:
-    """Load forecast data from GRIB files for a specific valid time."""
+def _collect_ml_grib_files(
+    root: Path, reftime: datetime, steps: list[int] | None = None
+) -> list[Path]:
+    """Return GRIB files for an ML inference run (flat directory layout).
+
+    When `steps` is provided, the discovered files are filtered to those whose
+    name ends with ``_{step:03d}.grib``.
+    """
     files = sorted(root.glob(f"{reftime:%Y%m%d%H%M}*.grib"))
+    if steps is None:
+        return files
+    suffixes = {f"_{step:03d}.grib" for step in steps}
+    return [f for f in files if any(f.name.endswith(s) for s in suffixes)]
+
+
+def _collect_icon_archive_files(
+    root: Path, reftime: datetime, steps: list[int], member_id: str = "000"
+) -> list[Path]:
+    """Return surface GRIB files for one member of an ICON operational archive.
+
+    `root` is the FCST<year> directory, e.g.
+    ``/store_new/mch/msopr/osm/ICON-CH1-EPS/FCST25``.
+    """
+    reftime_dirs = sorted(root.glob(f"{reftime:%y%m%d%H}_*"))
+    if not reftime_dirs:
+        raise ValueError(
+            f"No archive subdirectory found for {reftime:%y%m%d%H} in {root}"
+        )
+    reftime_dir = reftime_dirs[-1]
+    LOG.info("Reading ICON archive from %s", reftime_dir)
+
+    if "ICON-CH1-EPS" in root.parts:
+        gribname = "i1eff"
+    elif "ICON-CH2-EPS" in root.parts:
+        gribname = "i2eff"
+    else:
+        raise ValueError(
+            f"Cannot determine model from path (expected ICON-CH1-EPS or "
+            f"ICON-CH2-EPS): {root}"
+        )
+
+    return [
+        reftime_dir / "grib" / f"{gribname}{lt // 24:02}{lt % 24:02}0000_{member_id}"
+        for lt in steps
+    ]
+
+
+def load_fct_data_from_grib(files: list[Path], params: list[str]) -> xr.Dataset:
+    """Load forecast data from a list of GRIB files."""
     fds = data_source.FileDataSource(datafiles=files)
-    ds = grib_decoder.load(fds, {"param": params, "step": steps})
+    ds = grib_decoder.load(fds, {"param": params})
     for var, da in ds.items():
         if "z" in da.dims and da.sizes["z"] == 1:
             ds[var] = da.squeeze("z", drop=True)
         elif "z" in da.dims and da.sizes["z"] > 1:
             ds[var] = da.rename({"z": da.attrs["vcoord_type"]})
     ds = xr.merge([ds[p].rename(p) for p in ds], compat="no_conflicts")
-    lead_times = np.array(steps, dtype="timedelta64[h]")
-    # Restrict to the requested lead times so that the TOT_PREC disaggregation
-    # below operates on the correct step interval even if the GRIB contains
-    # extra (e.g. hourly) steps beyond those requested — e.g. when consuming
-    # output from an interpolator emulator or a baseline with sub-step output.
-    ds = ds.sel(lead_time=lead_times)
+    lead_times = ds.lead_time.values
     if "TOT_PREC" in ds.data_vars:
         ## Disaggregate TOT_PREC from cumulative-from-start (expected when the
         ## accumulate_from_start_of_forecast post-processor is enabled in
@@ -171,7 +210,7 @@ def load_fct_data_from_grib(
         ds = ds.rename({"valid_time": "time"})
     if "time" not in ds.coords:
         ds = ds.assign_coords(time=ds.ref_time + ds.lead_time)
-    ds = ds.sel(ref_time=reftime)
+    ds = ds.squeeze("ref_time", drop=False)
 
     # rename 'cell' dimension to 'values' (it's earthkit-data default for flattened spatial dim)
     if "cell" in ds.dims:
@@ -179,55 +218,6 @@ def load_fct_data_from_grib(
     return ds
 
 
-def load_baseline_from_zarr(
-    root: Path, reftime: datetime, steps: list[int], params: list[str]
-) -> xr.Dataset:
-    """Load forecast data from a Zarr dataset."""
-    try:
-        baseline = xr.open_zarr(root, consolidated=True, decode_timedelta=True)
-    except ValueError:
-        raise ValueError(f"Could not open baseline zarr at {root}")
-
-    baseline = baseline.rename(
-        {"forecast_reference_time": "ref_time", "step": "lead_time"}
-    ).sortby("lead_time")
-    lead_times = np.array(steps, dtype="timedelta64[h]")
-    # Restrict to the requested lead times up-front so that the TOT_PREC
-    # disaggregation below operates on the correct step interval, and so that
-    # all other variables avoid loading unused hourly steps from the zarr.
-    baseline = baseline[params].sel(ref_time=reftime, lead_time=lead_times)
-    if "TOT_PREC" in baseline.data_vars:
-        if baseline.TOT_PREC.units == "m":
-            baseline = baseline.assign(TOT_PREC=lambda x: x.TOT_PREC * 1000)
-            baseline.TOT_PREC.attrs["units"] = "kg m-2"
-        ## Disaggregate TOT_PREC from cumulative-from-start (the expected zarr
-        ## convention for processed NWP output) to per-step accumulations.
-        ##
-        ## Sanity-check that the incoming data is actually cumulative: if
-        ## .diff() produces significantly negative values, TOT_PREC is already
-        ## period-accumulated and a second disaggregation would produce
-        ## garbage. In that case raise — we always expect cumulative-from-
-        ## start precipitation here.
-        diff = baseline.TOT_PREC.diff("lead_time")
-        min_diff = float(diff.min().compute())
-        if min_diff < -0.1:  # TOT_PREC canonical units are mm
-            raise ValueError(
-                f"TOT_PREC in the baseline zarr appears to already be "
-                f"period-accumulated (min(.diff()) = {min_diff:.3e} m)."
-            )
-        ## .diff() drops lead_time=0; .reindex() restores it as NaN (no
-        ## accumulation period exists at the forecast initial time). Clip
-        ## small float-noise negatives to zero (anything below -0.1 mm has
-        ## already been caught by the check above).
-        baseline = baseline.assign(
-            TOT_PREC=diff.clip(min=0.0).reindex(lead_time=lead_times)
-        )
-    baseline = baseline.assign_coords(time=baseline.ref_time + baseline.lead_time)
-    if "latitude" in baseline.coords and "longitude" in baseline:
-        baseline = baseline.rename({"latitude": "lat", "longitude": "lon"})
-    return baseline
-
-
 def load_obs_data_from_peakweather(
     root, reftime: datetime, steps: list[int], params: list[str], freq: str = "1h"
 ) -> xr.Dataset:
@@ -316,23 +306,22 @@ def load_truth_data(
 def load_forecast_data(
     root, reftime: datetime, steps: list[int], params: list[str]
 ) -> xr.Dataset:
-    """Load forecast data from GRIB files or a baseline Zarr dataset."""
+    """Load forecast data from GRIB files or an ICON archive.
 
+    Routing (in order):
+    1. ``*.grib`` files present in *root* → :func:`load_fct_data_from_grib`
+       (ML inference output)
+    2. Otherwise → ICON operational archive
+    """
+    root = Path(root)
     if any(root.glob("*.grib")):
         LOG.info("Loading forecasts from GRIB files...")
-        fcst = load_fct_data_from_grib(
-            root=root,
-            reftime=reftime,
-            steps=steps,
-            params=params,
-        )
-    else:
-        LOG.info("Loading baseline forecasts from zarr dataset...")
-        fcst = load_baseline_from_zarr(
-            root=root,
-            reftime=reftime,
-            steps=steps,
+        return load_fct_data_from_grib(
+            files=_collect_ml_grib_files(root, reftime, steps),
             params=params,
         )
-
-    return fcst
+    LOG.info("Loading baseline forecasts from ICON GRIB archive...")
+    return load_fct_data_from_grib(
+        files=_collect_icon_archive_files(root, reftime, steps),
+        params=params,
+    )
diff --git a/tests/conftest.py b/tests/conftest.py
index 326f9199..717d56e5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,7 +8,7 @@
 
 @pytest.fixture
 def example_forecasters_config():
-    configfile = PROJECT_ROOT / "config/forecasters-co2.yaml"
+    configfile = PROJECT_ROOT / "config/forecasters-ich1.yaml"
     with open(configfile, "r") as f:
         config = yaml.safe_load(f)
     return config
@@ -16,7 +16,7 @@ def example_forecasters_config():
 
 @pytest.fixture
 def example_interpolators_config():
-    configfile = PROJECT_ROOT / "config/interpolators-co2.yaml"
+    configfile = PROJECT_ROOT / "config/interpolators-ich1.yaml"
     with open(configfile, "r") as f:
         config = yaml.safe_load(f)
     return config
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index 40281a6e..4c715931 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -1,5 +1,3 @@
-from pathlib import Path
-
 import pytest
 
 from evalml.config import ConfigModel
@@ -41,53 +39,3 @@ def test_legacy_top_level_baselines_still_supported(example_forecasters_config):
     ]
 
     _ = ConfigModel.model_validate(cfg)
-
-
-def test_workflow_parsing_excludes_baselines_from_run_configs(
-    example_forecasters_config,
-):
-    """Baseline entries in `runs` should not be treated as ML run configs."""
-
-    namespace = {
-        "Path": Path,
-        "config": example_forecasters_config,
-    }
-    common_rules = Path("workflow/rules/common.smk").read_text()
-
-    exec(common_rules, namespace)
-
-    run_configs = namespace["RUN_CONFIGS"]
-    baseline_configs = namespace["BASELINE_CONFIGS"]
-
-    assert all(
-        run_config["model_type"] != "baseline" for run_config in run_configs.values()
-    )
-    assert baseline_configs == {
-        "COSMO-E": {
-            "label": "COSMO-E",
-            "root": "/store_new/mch/msopr/ml/COSMO-E",
-            "steps": "0/120/6",
-        }
-    }
-
-
-def test_workflow_derives_baseline_id_from_root_stem(example_interpolators_config):
-    """Workflow baseline IDs should come from the baseline root path stem."""
-
-    namespace = {
-        "Path": Path,
-        "config": example_interpolators_config,
-    }
-    common_rules = Path("workflow/rules/common.smk").read_text()
-
-    exec(common_rules, namespace)
-
-    baseline_configs = namespace["BASELINE_CONFIGS"]
-
-    assert "COSMO-E_hourly" in baseline_configs
-    assert "COSMO-E-1h" not in baseline_configs
-    assert baseline_configs["COSMO-E_hourly"] == {
-        "label": "COSMO-E",
-        "root": "/store_new/mch/msopr/ml/COSMO-E_hourly",
-        "steps": "0/120/1",
-    }
diff --git a/workflow/Snakefile b/workflow/Snakefile
index a2586cb4..becffcb6 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -31,6 +31,7 @@ CONFIG_FILE = workflow.config_settings.configfiles[0]
 CONFIG_LABEL = config["config_label"] or CONFIG_FILE.stem
 EXPERIMENT_NAME = f"{WHEN}_{CONFIG_LABEL}_{CONFIG_HASH}"
 CANDIDATES = collect_all_candidates()
+BASELINES = collect_all_baselines()
 
 DATA_DIR = OUT_ROOT / "data"
 LOGS_DIR = OUT_ROOT / "logs"
@@ -83,6 +84,7 @@ onstart:
     print(_c(f"   Config:      {CONFIG_FILE.name}", "90"))
     print(_c(f"   Experiment:  {EXPERIMENT_NAME}", "90"))
     print(_c(f"   Candidates:  {", ".join(CANDIDATES)}", "90"))
+    print(_c(f"   Baselines:   {", ".join(BASELINES)}", "90"))
     print(_c(f"   Data dir:    {DATA_DIR}", "90"))
     print(_c(f"   Logs dir:    {LOGS_DIR}", "90"))
     print(_c(f"   Results dir: {RESULTS_DIR}", "90"))
diff --git a/workflow/rules/verification.smk b/workflow/rules/verification.smk
index 58b215e5..683c65f1 100644
--- a/workflow/rules/verification.smk
+++ b/workflow/rules/verification.smk
@@ -10,16 +10,19 @@ include: "common.smk"
 
 
 # TODO: make sure the boundaries aren't used
+def _get_baseline_forecast_path(wc):
+    """Return the FCST<year> directory for a baseline in the ICON GRIB archive."""
+    root = BASELINE_CONFIGS[wc.baseline_id].get("root")
+    year = wc.init_time[2:4]
+    return f"{root}/FCST{year}"
+
+
 rule verification_metrics_baseline:
     input:
         "src/verification/__init__.py",
         "src/data_input/__init__.py",
         script="workflow/scripts/verification_metrics.py",
-        baseline_zarr=lambda wc: expand(
-            "{root}/FCST{year}.zarr",
-            root=BASELINE_CONFIGS[wc.baseline_id].get("root"),
-            year=wc.init_time[2:4],
-        ),
+        forecast=_get_baseline_forecast_path,
         truth=config["truth"]["root"],
     params:
         baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
@@ -38,7 +41,7 @@ rule verification_metrics_baseline:
     shell:
         """
         uv run {input.script} \
-            --forecast {input.baseline_zarr} \
+            --forecast {input.forecast} \
             --truth {input.truth} \
             --reftime {wildcards.init_time} \
             --steps "{params.baseline_steps}" \