Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions R/Lrnr_density_semiparametric.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,18 @@ Lrnr_density_semiparametric <- R6Class(
min_obs_error <- 2 * min(se_task$Y)
var_fit <- var_learner$train(se_task)
var_preds <- var_fit$predict()
var_preds[var_preds < 0] <- min_obs_error
sd_preds <- sqrt(var_preds)

# calculate a practical floor for error variance, called squared tolerance
errors_sd <- sd(errors)
tol2 <- (0.1 * errors_sd)^2

# replace any NA or too-small var_pred with tol2
var_preds_clean <- ifelse(
is.na(var_preds) | var_preds < 0,
tol2,
var_preds
)
sd_preds <- sqrt(var_preds_clean)
errors <- errors / sd_preds
} else {
var_fit <- NULL
Expand Down
106 changes: 106 additions & 0 deletions tests/testthat/test-density-semiparametric.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,109 @@ test_that("Lrnr_density_semiparametric works", {
# x_samp <- sample(x_grid, 20)
# ggplot(long[x%in%x_samp],aes(x=y,y=value,color=variable))+geom_line()+facet_wrap(~round(x,5),scales="free_x")+theme_bw()+coord_flip()
})


# Densities should integrate to 1 ---
#
#
# the core logic of this test is that a (conditional) density model fit by
# Lrnr_density_semiparametric (given predictors) should integrate to 1 as it
# should be an estimate of a probability density function.
#
#
test_that("Lrnr_density_semiparametric produces densities that integrate to 1", {

# fit a heteroskedastic density model (using glm for the conditional mean
# component, and glm for the conditional variance component), fix a vector of
# covariates, and integrate the density across a region of plausible outcome
# values.
#
# this is in a function so we can repeat the test on at least two different
# datasets.
fit_a_heteroskedastic_density_model_and_integrate_area_under_density_curve <-
function(
df,
covariates,
outcome) {

task <- sl3_Task$new(
df,
covariates = covariates,
outcome = outcome)

heteroskedastic_glm_glm_sl3 <- Lrnr_density_semiparametric$new(
mean_learner = make_learner(Lrnr_glm),
var_learner = make_learner(Lrnr_glm)
)

Lrnr_glm_heteroskedastic_fit <- heteroskedastic_glm_glm_sl3$train(task)
Lrnr_glm_heteroskedastic_predictor <- Lrnr_glm_heteroskedastic_fit$predict


f_heteroskedastic_density <- function(ys) {
# take the first row to use to fix all the covariates
x <- df[1,,drop=FALSE]
# replicate it for each point at which we will evaluate it to integrate it
newdata <- dplyr::bind_rows(replicate(n = length(ys), expr = x, simplify = FALSE))
# replace the outcome with each of the values to evaluate the density at
newdata[outcome] <- ys

new_task <- sl3::sl3_Task$new(
data = newdata,
covariates = covariates,
outcome = outcome)

Lrnr_glm_heteroskedastic_predictor(new_task)
}

# setup a range across which we will integrate
lower_than_min <- min(df[[outcome]]) - 10*sd(df[[outcome]])
higher_than_max <- max(df[[outcome]]) + 10*sd(df[[outcome]])

# plotting code for a sanity/face-value check:
# outcome_values <- seq(lower_than_min, higher_than_max, length.out = 10000)
# density_values <- f_heteroskedastic_density(outcome_values)
# plot(outcome_values, density_values, type = 'l')

# integrate the density given the covariates in the first row of df
integrated_area_under_density_curve <-
integrate(f_heteroskedastic_density,
lower = lower_than_min,
upper = higher_than_max)

return(integrated_area_under_density_curve$value)
}

# the following test fits a conditional density model to the mtcars dataset
# using hp as the outcome and the other continuous variables
# as the covariates.

mtcars_integrated_area_under_density <-
fit_a_heteroskedastic_density_model_and_integrate_area_under_density_curve(
df = mtcars,
covariates = c('mpg', 'disp', 'drat', 'wt', 'qsec'),
outcome = 'hp'
)

testthat::expect_gt(mtcars_integrated_area_under_density, .9)
testthat::expect_lt(mtcars_integrated_area_under_density, 1.1)

# the following test fits a conditional density model to the MASS::Boston
# dataset using medv as the outcome and other continuous variables as the
# covariates.
#
# sl3 imports ggplot2 and ggplot2 imports MASS so no additional dependency
# is added by relying on MASS::Boston in this test.
Boston_integrated_area_under_density <-
fit_a_heteroskedastic_density_model_and_integrate_area_under_density_curve(
df = MASS::Boston,
covariates = c('crim', 'indus', 'nox', 'rm', 'age', 'dis', 'tax'),
outcome = 'medv'
)

testthat::expect_gt(Boston_integrated_area_under_density, .9)
testthat::expect_lt(Boston_integrated_area_under_density, 1.1)


})