Skip to content
Draft
117 changes: 117 additions & 0 deletions config/multipanel_example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# yaml-language-server: $schema=../workflow/tools/config.schema.json
description: |
Evaluate skill of Stage E with/without cutoff edges trained with and without subgrid orography.

dates:
start: 2025-01-01T06:00
end: 2025-12-26T00:00
frequency: 30h

runs:

- forecaster:
inference_resources:
slurm_partition: normal-shared
checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95
label: stage_E_realch1
steps: 0/120/6
config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml
extra_requirements:
- git+https://github.com/ecmwf/anemoi-inference.git@0.10.0

- forecaster:
inference_resources:
slurm_partition: normal-shared
checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/c30490b6ba064e4db03b430f3a2595ad
label: stage_E_icon_1km_cutoff_edges_subgrid_horography
steps: 0/120/6
config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml
extra_requirements:
- git+https://github.com/ecmwf/anemoi-inference.git@b9aaee5df86614cad9d8d08b76876a4be4e980db

# - forecaster:
# inference_resources:
# slurm_partition: normal-shared
# checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/57684b20f64f414b937cce10e5ceeb68
# label: stage_E_realch1_new
# steps: 0/120/6
# config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml
# extra_requirements:
# - git+https://github.com/ecmwf/anemoi-inference.git@b9aaee5df86614cad9d8d08b76876a4be4e980db

# - forecaster:
# inference_resources:
# slurm_partition: normal-shared
# checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/2265ae18b04e4470ab89314a85a822ae
# label: stage_E_icon_1km_cutoff_edges_KNN_5_dec
# steps: 0/120/6
# config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml
# extra_requirements:
# - git+https://github.com/ecmwf/anemoi-inference.git@b9aaee5df86614cad9d8d08b76876a4be4e980db


baselines:
- baseline:
baseline_id: ICON-CH1-EPS
label: ICON-CH1-ctrl
root: /scratch/mch/cmerker/ICON-CH1-EPS
steps: 0/33/6

- baseline:
baseline_id: ICON-CH2-EPS
label: ICON-CH2-ctrl
root: /scratch/mch/cmerker/ICON-CH2-EPS
steps: 0/120/6


truth:
label: KENDA-CH1
root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr

stratification:
regions:
- jura
root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517

dashboard:
stratification:
- season

locations:
output_root: ./output

profile:
executor: slurm
global_resources:
gpus: 16
default_resources:
slurm_partition: "postproc"
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "1h"
gpus: 0
jobs: 50
batch_rules:
plot_frame: 32

multipanel_plots:
bias_overview:
rows: 2
cols: 2
figsize: [12, 8]
title: "BIAS vs lead time"
panels:
- {metric: BIAS, param: T_2M, season: all, title: "T_2M — all"}
- {metric: BIAS, param: T_2M, season: JJA, title: "T_2M — JJA"}
- {metric: BIAS, param: PMSL, season: all, title: "PMSL — all"}
- {metric: BIAS, param: PMSL, season: JJA, title: "PMSL — JJA"}
rmse_overview:
rows: 2
cols: 2
figsize: [12, 8]
title: "RMSE vs lead time"
panels:
- {metric: RMSE, param: T_2M, init_hour: -999, title: "T_2M — 00 UTC"}
- {metric: RMSE, param: T_2M, init_hour: 12, title: "T_2M — 12 UTC"}
- {metric: RMSE, param: PMSL, init_hour: -999, title: "PMSL — 00 UTC"}
- {metric: RMSE, param: PMSL, init_hour: 12, title: "PMSL — 12 UTC"}
7 changes: 7 additions & 0 deletions resources/report/dashboard/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ document.getElementById("param-select").addEventListener("change", updateChart);
data = JSON.parse(document.getElementById("verif-data").textContent)
header = document.getElementById("header-text").textContent.trim()

// Pin the source -> color mapping to the full, alphabetically-sorted source
// list so it stays bijective even when sources are toggled in the UI. Must
// match src/plotting/source_colors.py to keep the dashboard and the static
// matplotlib figures consistent.
const allSources = [...new Set(data.map(d => d.source))].sort();

// Define base spec
var spec = {
"data": { "values": data },
Expand Down Expand Up @@ -106,6 +112,7 @@ var spec = {
"color": {
"field": "source",
"type": "nominal",
"scale": { "scheme": "tableau10", "domain": allSources },
"legend": { "orient": "top", "title": "Data Source", "offset": 0, "padding": 10 }
},
"shape": {
Expand Down
70 changes: 69 additions & 1 deletion src/evalml/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from typing import Dict, List, Any, ClassVar, FrozenSet

from pydantic import BaseModel, Field, RootModel, field_validator
from pydantic import BaseModel, Field, RootModel, field_validator, model_validator

PROJECT_ROOT = Path(__file__).parents[2]

Expand Down Expand Up @@ -227,6 +227,66 @@ class Stratification(BaseModel):
)


class MultipanelPanelSpec(BaseModel):
"""One panel inside a multi-panel metric-vs-lead-time figure."""

metric: str = Field(..., description="Metric name (e.g. 'rmse').")
param: str = Field(..., description="Parameter name (e.g. 'T_2M').")
region: str = Field(
"all",
description="Region to subset to. 'all' uses the unstratified aggregate.",
)
season: str = Field(
"all",
description="Season to subset to. 'all' uses the unstratified aggregate.",
)
init_hour: int = Field(
-999,
description="Init hour to subset to. -999 (sentinel) uses the unstratified aggregate.",
)
title: str | None = Field(
None,
description="Panel title. Defaults to '<metric> - <param>'.",
)
ylim: List[float] | None = Field(
None,
description="Optional [ymin, ymax] for this panel's y-axis.",
min_length=2,
max_length=2,
)

model_config = {"extra": "forbid"}


class MultipanelPlotSpec(BaseModel):
"""Layout for a single multi-panel metric-vs-lead-time figure."""

rows: int = Field(..., ge=1, description="Number of subplot rows.")
cols: int = Field(..., ge=1, description="Number of subplot columns.")
figsize: List[float] | None = Field(
None,
description="Optional [width, height] in inches. Defaults to (4.5*cols, 3.5*rows).",
min_length=2,
max_length=2,
)
title: str | None = Field(None, description="Optional figure-level title.")
panels: List[MultipanelPanelSpec] = Field(
...,
description="Per-panel specs in row-major order. Length must equal rows*cols.",
)

model_config = {"extra": "forbid"}

@model_validator(mode="after")
def _check_panel_count(self) -> "MultipanelPlotSpec":
expected = self.rows * self.cols
if len(self.panels) != expected:
raise ValueError(
f"panels has length {len(self.panels)}, expected rows*cols = {expected}"
)
return self


class Dashboard(BaseModel):
"""Settings for the dashboard"""

Expand Down Expand Up @@ -351,6 +411,14 @@ def validate_threshold_operators(
dashboard: Dashboard
locations: Locations
profile: Profile
multipanel_plots: Dict[str, MultipanelPlotSpec] = Field(
default_factory=dict,
description=(
"Optional named multi-panel metric-vs-lead-time figures. "
"Each entry produces one PNG under results/<experiment>/multipanel/<name>.png "
"when the verification_metrics_multipanel_plot_all target is built."
),
)

model_config = {
"extra": "forbid", # fail on misspelled keys
Expand Down
64 changes: 64 additions & 0 deletions src/plotting/metric_lead_time_panel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Per-axes plotting helper for verification metrics vs. lead time."""

import pandas as pd
from matplotlib.axes import Axes

from verification import decode_metric

from .units import metric_units


def _default_ylabel(metric: str, param: str | None) -> str:
label = decode_metric(metric)
units = metric_units(metric, param) if param is not None else ""
return f"{label} [{units}]" if units else label


def plot_panel(
ax: Axes,
sub_df: pd.DataFrame,
*,
metric: str,
param: str | None = None,
title: str | None = None,
panel_label: str | None = None,
xlabel: str | None = "Lead Time [h]",
ylabel: str | None = None,
show_legend: bool = True,
color_map: dict[str, str] | None = None,
) -> None:
"""Plot one metric-vs-lead-time panel onto `ax`.

`sub_df` must already be filtered to a single (metric, param, region, season,
init_hour) combo and contain at least the columns: source, lead_time, value.
One line per source is drawn.

If `ylabel` is None and `param` is provided, the y-axis label is built as
"<decoded metric> [<units>]" via plotting.units.metric_units.

`panel_label` (e.g. "a)") is rendered left-aligned at the same height as
the centred title.

If `color_map` is given, each source's line is drawn in
``color_map[source]``; sources missing from the map fall back to
matplotlib's default color cycle. Use ``plotting.source_colors.source_color_map``
to build a map that matches the dashboard.
"""
if ylabel is None:
ylabel = _default_ylabel(metric, param)
for source, df in sub_df.groupby("source"):
df.plot(
x="lead_time",
y="value",
kind="line",
marker="o",
title=title,
xlabel=xlabel or "",
ylabel=ylabel or "",
label=source,
color=(color_map or {}).get(source),
ax=ax,
legend=show_legend,
)
if panel_label:
ax.set_title(panel_label, loc="left", fontweight="bold")
38 changes: 38 additions & 0 deletions src/plotting/source_colors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Stable source -> color mapping shared with the dashboard.

The dashboard uses Vega-Lite's ``tableau10`` categorical scheme and pins its
``color.scale.domain`` to the alphabetically-sorted full source list so the
mapping stays bijective regardless of dashboard filters. The matplotlib plots
use the same palette and ordering so a given source has the same color in
every figure produced from a verification run.

Both the dashboard and the matplotlib side wrap around when there are more
than ``len(TABLEAU10)`` sources, at which point two sources will share a
color. Switch palettes (e.g. to ``tableau20`` or a deterministic HSV ramp)
if that becomes a problem.
"""

# Vega-Lite "tableau10" scheme:
# https://vega.github.io/vega/docs/schemes/#tableau10
TABLEAU10: list[str] = [
"#4c78a8",
"#f58518",
"#e45756",
"#72b7b2",
"#54a24b",
"#eeca3b",
"#b279a2",
"#ff9da6",
"#9d755d",
"#bab0ac",
]


def source_color_map(sources) -> dict[str, str]:
"""Return ``{source: color}`` over unique sources, ordered alphabetically.

Wraps around for more than ``len(TABLEAU10)`` sources, matching Vega-Lite's
behaviour for a categorical scale whose domain exceeds the scheme.
"""
ordered = sorted(set(sources))
return {s: TABLEAU10[i % len(TABLEAU10)] for i, s in enumerate(ordered)}
26 changes: 26 additions & 0 deletions src/plotting/units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Canonical units for verification parameters and metrics.

Storage units in the verification netCDFs (BIAS, RMSE, MAE, STDE, ... all
inherit these). Update the dict if a parameter's internal representation
changes.
"""

PARAM_UNITS: dict[str, str] = {
"T_2M": "K",
"TD_2M": "K",
"PMSL": "Pa",
"PS": "Pa",
"TOT_PREC": "mm",
"U_10M": "m/s",
"V_10M": "m/s",
"SP_10M": "m/s",
}

UNITLESS_METRICS: set[str] = {"CORR", "R2"}


def metric_units(metric: str, param: str) -> str:
"""Return the canonical units of (metric, param), or '' if unitless/unknown."""
if metric.upper() in UNITLESS_METRICS:
return ""
return PARAM_UNITS.get(param, "")
Loading
Loading