epiforecasts · seabbs-bot · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -78,6 +78,8 @@ export(dispersion_quantile)
 export(dispersion_sample)
 export(dss_sample)
 export(energy_score_multivariate)
+export(filter_scores)
+export(filter_to_intersection)
 export(get_correlations)
 export(get_coverage)
 export(get_duplicate_forecasts)
@@ -88,6 +90,11 @@ export(get_grouping)
 export(get_metrics)
 export(get_pairwise_comparisons)
 export(get_pit_histogram)
+export(impute_mean_score)
+export(impute_missing_scores)
+export(impute_model_score)
+export(impute_na_score)
+export(impute_worst_score)
 export(interval_coverage)
 export(is_forecast)
 export(is_forecast_binary)
@@ -178,6 +185,7 @@ importFrom(data.table,key)
 importFrom(data.table,melt)
 importFrom(data.table,nafill)
 importFrom(data.table,rbindlist)
+importFrom(data.table,set)
 importFrom(data.table,setDT)
 importFrom(data.table,setattr)
 importFrom(data.table,setcolorder)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # scoringutils (development version)
 
+- Added `filter_scores()` and `impute_missing_scores()` for handling missing forecasts before summarisation. `filter_scores()` removes target combinations with insufficient model coverage, while `impute_missing_scores()` fills in missing scores using configurable strategies (worst, mean, NA, or reference model). Both use a strategy function pattern for extensibility. See `vignette("handling-missing-forecasts")` for details (#1122).
 - Added internal S3 generic `get_forecast_type_ids()` so each forecast type declares the columns (beyond the forecast unit) that identify a unique row. `get_duplicate_forecasts()` now uses this instead of hard-coded column names (#888).
 - Removed the deprecated vignettes `Deprecated-functions` and `Deprecated-visualisations`. The code for removed functions (`plot_predictions()`, `make_NA()`, `plot_ranges()`, `plot_score_table()`, `merge_pred_and_obs()`) can still be found in the [git history](https://github.com/epiforecasts/scoringutils/tree/d0cd8e2/vignettes) (#1158).
 

diff --git a/R/filter-scores.R b/R/filter-scores.R
@@ -0,0 +1,166 @@
+#' @title Filter scores
+#'
+#' @description
+#' Filters a `scores` object according to a given strategy.
+#' The filtering behaviour is controlled by the `strategy`
+#' argument, which defaults to [filter_to_intersection()].
+#' This is a general-purpose filtering function that delegates
+#' all logic to the strategy.
+#'
+#' @param scores An object of class `scores` (a data.table with
+#'   scores and an additional attribute `metrics` as produced
+#'   by [score()]).
+#' @param strategy A strategy function as returned by
+#'   [filter_to_intersection()]. Default is
+#'   `filter_to_intersection()`.
+#' @param compare Character string (default `"model"`) naming the
+#'   column whose values are compared for filtering.
+#'
+#' @return A filtered `scores` object with the same class and
+#'   `metrics` attribute as the input.
+#'
+#' @seealso \code{vignette("handling-missing-forecasts")}
+#' @importFrom cli cli_inform
+#' @importFrom checkmate assert_class assert_character
+#'   assert_function assert_subset
+#' @export
+#' @keywords postprocess-scores
+filter_scores <- function(
+  scores,
+  strategy = filter_to_intersection(),
+  compare = "model"
+) {
+  assert_class(scores, "scores")
+  assert_character(compare, len = 1)
+  assert_subset(compare, names(scores))
+  assert_function(strategy)
+
+  original_class <- class(scores)
+  original_metrics <- attr(scores, "metrics")
+
+  result <- strategy(scores, compare = compare)
+
+  n_before <- nrow(scores)
+  n_after <- nrow(result)
+  #nolint start: object_usage_linter
+  n_dropped <- n_before - n_after
+  #nolint end
+
+  if (n_dropped == 0) {
+    cli_inform(c(
+      i = "No rows filtered. Returning scores unchanged."
+    ))
+    return(scores)
+  }
+
+  cli_inform(c(
+    i = "Filtered out {n_dropped} rows.",
+    i = "{n_after} of {n_before} rows remaining." # nolint: duplicate_argument_linter
+  ))
+
+  # Preserve class and metrics
+  class(result) <- original_class
+  data.table::setattr(result, "metrics", original_metrics)
+
+  return(result)
+}
+
+
+#' @title Filter to intersection of model-target combinations
+#'
+#' @description
+#' Strategy factory for [filter_scores()].
+#' Returns a function that keeps only target combinations
+#' covered by a minimum proportion of comparators.
+#'
+#' @param min_coverage Numeric between 0 and 1 (default `1`).
+#'   Minimum proportion of comparators that must cover a
+#'   target combination for it to be kept.
+#' @param include Character vector or `NULL` (default). If
+#'   provided, the target grid is restricted to targets
+#'   covered by these values of the `compare` column. When
+#'   multiple values are given, only the intersection of
+#'   their targets is used.
+#'
+#' @return A function with signature `function(scores, compare)`
+#'   suitable for use as a strategy in
+#'   [filter_scores()].
+#'
+#' @importFrom data.table as.data.table setkeyv
+#' @importFrom checkmate assert_number assert_character
+#' @export
+#' @keywords postprocess-scores
+filter_to_intersection <- function(
+  min_coverage = 1,
+  include = NULL
+) {
+  assert_number(min_coverage, lower = 0, upper = 1)
+  if (!is.null(include)) {
+    assert_character(include, min.len = 1)
+  }
+
+  function(scores, compare = "model") {
+    scores <- data.table::as.data.table(scores)
+    forecast_unit <- get_forecast_unit(scores)
+    target_cols <- setdiff(forecast_unit, compare)
+
+    if (!is.null(include)) {
+      unknown <- setdiff(include, unique(scores[[compare]]))
+      if (length(unknown) > 0) {
+        cli::cli_abort(c(
+          "!" = paste0(
+            "{.val {unknown}} not found in ",
+            "{.arg {compare}} column."
+          )
+        ))
+      }
+      # Restrict to targets covered by specified values
+      model_targets <- lapply(include, function(m) {
+        unique(
+          scores[
+            scores[[compare]] == m,
+            target_cols,
+            with = FALSE
+          ]
+        )
+      })
+      # Intersection of all specified values' targets
+      qualifying <- model_targets[[1]]
+      if (length(model_targets) > 1) {
+        for (i in seq(2, length(model_targets))) {
+          data.table::setkeyv(qualifying, target_cols)
+          data.table::setkeyv(
+            model_targets[[i]], target_cols
+          )
+          qualifying <- merge(
+            qualifying, model_targets[[i]],
+            by = target_cols
+          )
+        }
+      }
+    } else {
+      # Count include per target combination
+      all_include <- unique(scores[[compare]])
+      n_total <- length(all_include)
+
+      target_coverage <- scores[
+        , .(n_include = data.table::uniqueN(get(compare))),
+        by = target_cols
+      ]
+      #nolint start: object_usage_linter
+      qualifying <- target_coverage[
+        n_include / n_total >= min_coverage,
+        #nolint end
+        target_cols,
+        with = FALSE
+      ]
+    }
+
+    # Semi-join: keep scores rows matching qualifying targets
+    data.table::setkeyv(scores, target_cols)
+    data.table::setkeyv(qualifying, target_cols)
+    result <- scores[qualifying, nomatch = NULL]
+
+    return(result)
+  }
+}
diff --git a/R/get-protected-columns.R b/R/get-protected-columns.R
@@ -15,6 +15,7 @@ get_protected_columns <- function(data = NULL) {
 
   protected_columns <- c(
     ".mv_group_id",
+    ".imputed",
     "predicted", "observed", "sample_id", "quantile_level", "upper", "lower",
     "pit_value", "interval_range", "boundary", "predicted_label",
     "interval_coverage", "interval_coverage_deviation",