Skip to content

Commit 19ccc1b

Browse files
kstawiskiclaude
andcommitted
feat: Add safe-preprocessing and safe-evaluation utilities
Methodology review of miRPOC OC miRNA meta-analysis (2026-04-02) identified recurring bugs in user analysis scripts that OmicSelector should prevent at the framework level: safe-preprocessing.R: - impute_within_fold(): Enforces training-only median imputation within CV folds, preventing the common global pre-loop imputation leak (found in 10 analysis scripts) - check_no_premature_imputation(): Detects if a matrix was already imputed before the CV loop started - standardize_within_fold(): Training-only centering and scaling safe-evaluation.R: - safe_roc(): Wrapper around pROC::roc() that defaults to direction="auto" and warns on hardcoded direction (prevents the AUC inversion bug caused by direction="<" with class-weighted models where benign scores > OC scores) - safe_auc(): Convenience wrapper returning AUC + DeLong CI - test_noninferiority(): Paired DeLong-based non-inferiority test (prevents wrong reference AUC and informal CI-check substitutes) - check_null_benchmark_draws(): Warns when <500 draws used for 95th percentile null estimates Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5ccb835 commit 19ccc1b

3 files changed

Lines changed: 419 additions & 0 deletions

File tree

NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ export(cache_stats)
4040
export(cached_filter)
4141
export(calibration_summary)
4242
export(check_batch_correction_leakage)
43+
export(check_no_premature_imputation)
44+
export(check_null_benchmark_draws)
4345
export(check_dl_availability)
4446
export(check_feature_correlations)
4547
export(clear_cache)
@@ -66,6 +68,7 @@ export(export_onnx)
6668
export(export_vetiver)
6769
export(extract_features_from_resample)
6870
export(extract_selected_features)
71+
export(impute_within_fold)
6972
export(feature_importance)
7073
export(finetune_mlr3torch_checkpoint)
7174
export(finetune_omicfit_checkpoint)
@@ -113,12 +116,16 @@ export(reliability_diagram_data)
113116
export(reset_parallel)
114117
export(run_bayesian_benchmark)
115118
export(run_dl_benchmark)
119+
export(safe_auc)
120+
export(safe_roc)
121+
export(standardize_within_fold)
116122
export(select_best_signature)
117123
export(setup_parallel)
118124
export(shap_values)
119125
export(smote_augment)
120126
export(stack_omics)
121127
export(tabddpm_generate)
128+
export(test_noninferiority)
122129
export(validate_omics_input)
123130
export(validate_synthetic)
124131
export(with_parallel)

R/safe-evaluation.R

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
#' @title Safe Evaluation Utilities for Biomarker Validation
2+
#'
3+
#' @description
4+
#' Functions that enforce correct ROC computation and non-inferiority testing.
5+
#' These utilities prevent the most common evaluation bugs:
6+
#' hardcoded ROC direction (inverting AUC), wrong non-inferiority references,
7+
#' and unstable percentile estimates from insufficient random draws.
8+
#'
9+
#' @name safe-evaluation
10+
NULL
11+
12+
13+
#' @title Safe ROC Computation (Direction-Guarded)
14+
#'
15+
#' @description
16+
#' Wrapper around \code{pROC::roc()} that enforces \code{direction = "auto"}
17+
#' by default and warns loudly when direction is hardcoded.
18+
#'
19+
#' **Common bug this prevents**: Hardcoding \code{direction = "<"} when the
20+
#' predictor's score direction depends on class weights, calibration, or
21+
#' model type. With extreme class weighting (e.g., 17x), the positive class
22+
#' can have LOWER predicted probabilities, inverting the AUC to 1 - true_AUC.
23+
#'
24+
#' @param response Binary outcome vector (0/1 or factor)
25+
#' @param predictor Numeric prediction scores
26+
#' @param direction ROC direction. Default \code{"auto"} (strongly recommended).
27+
#' If set to \code{"<"} or \code{">"}, a warning is issued.
28+
#' @param levels Two-element vector specifying the levels of the response.
29+
#' Default \code{c(0, 1)}.
30+
#' @param quiet Logical; suppress pROC messages. Default TRUE.
31+
#' @param ... Additional arguments passed to \code{pROC::roc()}
32+
#'
33+
#' @return A pROC roc object
34+
#'
35+
#' @examples
36+
#' \dontrun{
37+
#' roc_obj <- safe_roc(y_true, y_pred)
38+
#' auc_val <- as.numeric(pROC::auc(roc_obj))
39+
#' }
40+
#'
41+
#' @export
42+
safe_roc <- function(response, predictor, direction = "auto",
43+
levels = c(0, 1), quiet = TRUE, ...) {
44+
45+
if (!requireNamespace("pROC", quietly = TRUE)) {
46+
stop("Package 'pROC' is required. Install with: install.packages('pROC')", call. = FALSE)
47+
}
48+
49+
if (direction != "auto") {
50+
warning(
51+
sprintf(
52+
"Hardcoded direction='%s' in ROC computation. ",
53+
direction
54+
),
55+
"This can INVERT the AUC when class weights or calibration change the ",
56+
"score direction. Use direction='auto' unless you have verified the ",
57+
"predictor's score polarity. See: OmicSelector methodology audit, ",
58+
"pROC direction bug (miRPOC 2026-04-02).",
59+
call. = FALSE
60+
)
61+
}
62+
63+
pROC::roc(
64+
response = response,
65+
predictor = predictor,
66+
direction = direction,
67+
levels = levels,
68+
quiet = quiet,
69+
...
70+
)
71+
}
72+
73+
74+
#' @title Safe AUC with Confidence Interval
75+
#'
76+
#' @description
77+
#' Computes AUC with DeLong confidence interval using \code{safe_roc()}.
78+
#' Returns a named list with AUC, CI, and the ROC object.
79+
#'
80+
#' @param response Binary outcome vector (0/1)
81+
#' @param predictor Numeric prediction scores
82+
#' @param ci_method CI method: "delong" (default) or "bootstrap"
83+
#' @param ... Additional arguments passed to \code{safe_roc()}
84+
#'
85+
#' @return A list with:
86+
#' \item{auc}{Numeric AUC value}
87+
#' \item{ci_lower}{Lower bound of 95\% CI}
88+
#' \item{ci_upper}{Upper bound of 95\% CI}
89+
#' \item{roc}{The pROC roc object}
90+
#' \item{direction}{Direction used by pROC}
91+
#'
92+
#' @export
93+
safe_auc <- function(response, predictor, ci_method = "delong", ...) {
94+
95+
roc_obj <- safe_roc(response, predictor, ...)
96+
auc_val <- as.numeric(pROC::auc(roc_obj))
97+
ci_obj <- pROC::ci.auc(roc_obj, method = ci_method)
98+
99+
list(
100+
auc = auc_val,
101+
ci_lower = as.numeric(ci_obj[1]),
102+
ci_upper = as.numeric(ci_obj[3]),
103+
roc = roc_obj,
104+
direction = roc_obj$direction
105+
)
106+
}
107+
108+
109+
#' @title Paired DeLong Non-Inferiority Test
110+
#'
111+
#' @description
112+
#' Tests whether a candidate model's AUC is non-inferior to a reference AUC,
113+
#' using a paired DeLong test on the same predictions.
114+
#'
115+
#' **Common bugs this prevents**:
116+
#' \enumerate{
117+
#' \item Using the wrong reference AUC (e.g., superseded Phase 2.1 instead of
118+
#' current Phase 2.1b)
119+
#' \item Using an unpaired CI check instead of a formal paired DeLong test
120+
#' \item Comparing AUCs from different sample denominators
121+
#' }
122+
#'
123+
#' @param response Binary outcome vector (0/1), same for both models
124+
#' @param predictor_candidate Candidate model predictions
125+
#' @param predictor_reference Reference model predictions. Must be on the
126+
#' SAME samples as predictor_candidate.
127+
#' @param delta Non-inferiority margin (positive value, e.g., 0.03).
128+
#' Candidate is non-inferior if its AUC is within delta of the reference.
129+
#' @param alpha Significance level (default 0.05 for one-sided test)
130+
#'
131+
#' @return A list with:
132+
#' \item{non_inferior}{Logical: TRUE if candidate is non-inferior}
133+
#' \item{auc_candidate}{Candidate AUC}
134+
#' \item{auc_reference}{Reference AUC}
135+
#' \item{delta_auc}{Candidate - Reference AUC}
136+
#' \item{delta}{Non-inferiority margin used}
137+
#' \item{se_diff}{Standard error of AUC difference (paired DeLong)}
138+
#' \item{z_stat}{Z-statistic for non-inferiority}
139+
#' \item{p_value}{One-sided p-value}
140+
#' \item{ci_lower_diff}{Lower bound of 95\% CI for AUC difference}
141+
#'
142+
#' @examples
143+
#' \dontrun{
144+
#' result <- test_noninferiority(
145+
#' response = y_test,
146+
#' predictor_candidate = pred_reduced_panel,
147+
#' predictor_reference = pred_full_panel,
148+
#' delta = 0.03
149+
#' )
150+
#' cat("Non-inferior:", result$non_inferior, "\n")
151+
#' }
152+
#'
153+
#' @export
154+
test_noninferiority <- function(response, predictor_candidate, predictor_reference,
155+
delta = 0.03, alpha = 0.05) {
156+
157+
if (!requireNamespace("pROC", quietly = TRUE)) {
158+
stop("Package 'pROC' is required.", call. = FALSE)
159+
}
160+
161+
if (length(response) != length(predictor_candidate) ||
162+
length(response) != length(predictor_reference)) {
163+
stop("response, predictor_candidate, and predictor_reference must have equal length",
164+
call. = FALSE)
165+
}
166+
167+
if (delta <= 0) {
168+
stop("delta must be positive (e.g., 0.03)", call. = FALSE)
169+
}
170+
171+
# Compute ROC objects (direction = auto for both)
172+
roc_cand <- safe_roc(response, predictor_candidate)
173+
roc_ref <- safe_roc(response, predictor_reference)
174+
175+
auc_cand <- as.numeric(pROC::auc(roc_cand))
176+
auc_ref <- as.numeric(pROC::auc(roc_ref))
177+
delta_auc <- auc_cand - auc_ref
178+
179+
# Paired DeLong variance for AUC difference
180+
# Use pROC::roc.test for the paired comparison
181+
delong_test <- pROC::roc.test(roc_ref, roc_cand, method = "delong", paired = TRUE)
182+
183+
# Extract standard error from the DeLong test
184+
# The test statistic is (AUC1 - AUC2) / SE
185+
# We need SE for the non-inferiority formulation
186+
se_diff <- abs(delta_auc / delong_test$statistic)
187+
if (!is.finite(se_diff) || se_diff == 0) {
188+
# Fallback: estimate from the DeLong CI
189+
se_diff <- abs(delta_auc) / abs(qnorm(delong_test$p.value / 2))
190+
if (!is.finite(se_diff)) se_diff <- NA_real_
191+
}
192+
193+
# Non-inferiority test: H0: AUC_cand - AUC_ref < -delta
194+
# H1: AUC_cand - AUC_ref >= -delta
195+
# Z = (delta_auc + delta) / SE
196+
if (is.finite(se_diff) && se_diff > 0) {
197+
z_stat <- (delta_auc + delta) / se_diff
198+
p_value <- pnorm(z_stat, lower.tail = FALSE) # One-sided
199+
# Actually for non-inferiority: we want to reject H0: diff < -delta
200+
# So p = P(Z > z) under H0
201+
p_value <- 1 - pnorm(z_stat)
202+
ci_lower_diff <- delta_auc - qnorm(1 - alpha) * se_diff
203+
} else {
204+
z_stat <- NA_real_
205+
p_value <- NA_real_
206+
ci_lower_diff <- NA_real_
207+
}
208+
209+
non_inferior <- !is.na(p_value) && p_value < alpha
210+
211+
list(
212+
non_inferior = non_inferior,
213+
auc_candidate = auc_cand,
214+
auc_reference = auc_ref,
215+
delta_auc = delta_auc,
216+
delta = delta,
217+
se_diff = as.numeric(se_diff),
218+
z_stat = as.numeric(z_stat),
219+
p_value = as.numeric(p_value),
220+
ci_lower_diff = as.numeric(ci_lower_diff)
221+
)
222+
}
223+
224+
225+
#' @title Validate Random-Panel Null Benchmark
226+
#'
227+
#' @description
228+
#' Checks whether the number of random draws is sufficient for a stable
229+
#' percentile estimate. Warns when fewer than 500 draws are used.
230+
#'
231+
#' @param n_draws Number of random panel draws performed
232+
#' @param percentile The percentile being estimated (e.g., 0.95)
233+
#'
234+
#' @return Invisible TRUE if adequate, FALSE with warning otherwise
235+
#'
236+
#' @export
237+
check_null_benchmark_draws <- function(n_draws, percentile = 0.95) {
238+
# Order statistic SE approximation
239+
min_recommended <- ceiling(20 / (1 - percentile)) # ~400 for 95th pctile
240+
241+
if (n_draws < min_recommended) {
242+
warning(
243+
sprintf(
244+
"Only %d draws for %.0fth percentile estimate (recommend >= %d). ",
245+
n_draws, percentile * 100, min_recommended
246+
),
247+
"The Monte Carlo error on this order statistic is non-trivial. ",
248+
"Consider rerunning with more draws or reporting a bootstrap CI ",
249+
"on the percentile.",
250+
call. = FALSE
251+
)
252+
return(invisible(FALSE))
253+
}
254+
invisible(TRUE)
255+
}

0 commit comments

Comments
 (0)