diff --git a/docs/src/api.md b/docs/src/api.md index 7592581..7145ec6 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -27,7 +27,6 @@ eti! AbstractELPDResult PSISLOOResult elpd_estimates -information_criterion loo ``` @@ -36,12 +35,10 @@ loo ```@docs ModelComparisonResult compare -model_weights ``` The following model weighting methods are available ```@docs -AbstractModelWeightsMethod BootstrappedPseudoBMA PseudoBMA Stacking diff --git a/src/PosteriorStats.jl b/src/PosteriorStats.jl index d21b8e9..5db5a3f 100644 --- a/src/PosteriorStats.jl +++ b/src/PosteriorStats.jl @@ -32,10 +32,10 @@ export PSIS, PSISResult, psis, psis! # LOO-CV export AbstractELPDResult, PSISLOOResult -export elpd_estimates, information_criterion, loo +export elpd_estimates, loo # Model weighting and comparison -export AbstractModelWeightsMethod, BootstrappedPseudoBMA, PseudoBMA, Stacking, model_weights +export BootstrappedPseudoBMA, PseudoBMA, Stacking export ModelComparisonResult, compare # Summary statistics @@ -49,7 +49,6 @@ export eti, eti!, hdi, hdi! export loo_pit, r2_score const DEFAULT_CI_PROB = 0.89f0 -const INFORMATION_CRITERION_SCALES = (deviance=-2, log=1, negative_log=-1) include("utils.jl") include("show_prettytable.jl") diff --git a/src/compare.jl b/src/compare.jl index 24755f3..57426bc 100644 --- a/src/compare.jl +++ b/src/compare.jl @@ -14,8 +14,15 @@ The ELPD is estimated by Pareto smoothed importance sampling leave-one-out cross # Keywords - - `weights_method::AbstractModelWeightsMethod=Stacking()`: the method to be used to weight - the models. See [`model_weights`](@ref) for details + - `method=Stacking()`: the method to be used to weight the models + (see [Yao2018](@citet) for details): + + [`Stacking`](@ref): Stacking of predictive distributions. The default and recommended + approach, as it performs well even when the true data generating process is not + included among the candidate models. + + [`BootstrappedPseudoBMA`](@ref): pseudo-Bayesian Model averaging using Akaike-type + weighting, where the weights are stabilized using the Bayesian bootstrap. + + [`PseudoBMA`](@ref): pseudo-Bayesian Model averaging using Akaike-type weighting + (not recommended). - `sort::Bool=true`: Whether to sort models by decreasing ELPD. # Returns @@ -54,7 +61,7 @@ Compare the same models from pre-computed PSIS-LOO results and computing ```jldoctest compare; setup = :(using Random; Random.seed!(23)) julia> elpd_results = mc.elpd_result; -julia> compare(elpd_results; weights_method=BootstrappedPseudoBMA()) +julia> compare(elpd_results; method=BootstrappedPseudoBMA()) ModelComparisonResult with BootstrappedPseudoBMA weights rank elpd se_elpd elpd_diff se_elpd_diff weight p se_p non_centered 1 -31 1.5 0 0.0 0.51 0.9 0.32 @@ -64,17 +71,18 @@ ModelComparisonResult with BootstrappedPseudoBMA weights # References - [Spiegelhalter2002](@cite) Spiegelhalter et al. J. R. Stat. Soc. B 64 (2002) +- [Yao2018](@cite) Yao et al. Bayesian Analysis 13, 3 (2018) """ function compare( inputs; - weights_method::AbstractModelWeightsMethod=Stacking(), + method::AbstractModelWeightsMethod=Stacking(), model_names=_indices(inputs), sort::Bool=true, ) length(model_names) === length(inputs) || throw(ArgumentError("Length of `model_names` must match length of `inputs`")) elpd_results = map(_maybe_loo, inputs) - weights = model_weights(weights_method, elpd_results) + weights = model_weights(method, elpd_results) perm = _sortperm(elpd_results; by=x -> elpd_estimates(x).elpd, rev=true) i_elpd_max = first(perm) elpd_max_i = elpd_estimates(elpd_results[i_elpd_max]; pointwise=true).elpd @@ -89,7 +97,7 @@ function compare( se_elpd_diff = map(last, se_elpd_diff_and) rank = _assimilar(elpd_results, (1:length(elpd_results))[perm]) result = ModelComparisonResult( - model_names, rank, elpd_diff, se_elpd_diff, weights, elpd_results, weights_method + model_names, rank, elpd_diff, se_elpd_diff, weights, elpd_results, method ) sort || return result return _permute(result, perm) @@ -117,13 +125,13 @@ struct ModelComparisonResult{E,N,R,W,ER,M} elpd_diff::E "Standard error of the ELPD difference" se_elpd_diff::E - "Model weights computed with `weights_method`" + "Model weights computed with `method`" weight::W """`AbstactELPDResult`s for each model, which can be used to access useful stats like ELPD estimates, pointwise estimates, and Pareto shape values for PSIS-LOO""" elpd_result::ER - "Method used to compute model weights with [`model_weights`](@ref)" - weights_method::M + "Method used to compute model weights" + method::M end #### custom tabular show methods @@ -140,7 +148,7 @@ function _show(io::IO, mime::MIME, r::ModelComparisonResult; kwargs...) cols = Tables.columnnames(r)[2:end] table = NamedTuple{cols}(Tables.columntable(r)) - weights_method_name = _typename(r.weights_method) + method_name = _typename(r.method) weights = table.weight digits_weights = ceil(Int, -log10(maximum(weights))) + 1 weight_formatter = PrettyTables.ft_printf( @@ -150,7 +158,7 @@ function _show(io::IO, mime::MIME, r::ModelComparisonResult; kwargs...) io, mime, table; - title="ModelComparisonResult with $(weights_method_name) weights", + title="ModelComparisonResult with $(method_name) weights", row_labels, extra_formatters=(weight_formatter,), kwargs..., @@ -160,7 +168,7 @@ end function _permute(r::ModelComparisonResult, perm) return ModelComparisonResult( (_permute(getfield(r, k), perm) for k in fieldnames(typeof(r))[1:(end - 1)])..., - r.weights_method, + r.method, ) end diff --git a/src/elpdresult.jl b/src/elpdresult.jl index c36f830..e6fd860 100644 --- a/src/elpdresult.jl +++ b/src/elpdresult.jl @@ -9,7 +9,6 @@ each, from which other relevant estimates can be computed. Subtypes implement the following functions: - [`elpd_estimates`](@ref) -- [`information_criterion`](@ref) """ abstract type AbstractELPDResult end @@ -29,35 +28,6 @@ Return the (E)LPD estimates from the `result`. """ function elpd_estimates end -""" - $(FUNCTIONNAME)(elpd, scale::Symbol) - -Compute the information criterion for the given `scale` from the `elpd` estimate. - -`scale` must be one of `$(keys(INFORMATION_CRITERION_SCALES))`. - -See also: [`loo`](@ref) -""" -function information_criterion(estimates, scale::Symbol) - scale_value = INFORMATION_CRITERION_SCALES[scale] - return scale_value * estimates.elpd -end - -""" - $(FUNCTIONNAME)(result::AbstractELPDResult, scale::Symbol; pointwise=false) - -Compute information criterion for the given `scale` from the existing ELPD `result`. - -`scale` must be one of `$(keys(INFORMATION_CRITERION_SCALES))`. - -If `pointwise=true`, then pointwise estimates are returned. -""" -function information_criterion( - result::AbstractELPDResult, scale::Symbol; pointwise::Bool=false -) - return information_criterion(elpd_estimates(result; pointwise), scale) -end - function _lpd_pointwise(log_likelihood, dims) ndraws = prod(Base.Fix1(size, log_likelihood), dims) lpd = LogExpFunctions.logsumexp(log_likelihood; dims) diff --git a/src/loo.jl b/src/loo.jl index 94010e4..144ada1 100644 --- a/src/loo.jl +++ b/src/loo.jl @@ -35,6 +35,13 @@ end Compute the Pareto-smoothed importance sampling leave-one-out cross-validation (PSIS-LOO). [Vehtari2017, LOOFAQ](@cite) +!!! details Relationship to information criteria + + PSIS-LOO estimates the expected log pointwise predictive density (ELPD) and effective + number of parameters (p). The ELPD can be converted to the "deviance" scale sometimes + used for information criteria by multiplying it by `-2`. See [LOOFAQ](@citet) for more + details. + `log_likelihood` must be an array of log-likelihood values with shape `(chains, draws[, params...])`. @@ -46,7 +53,9 @@ Compute the Pareto-smoothed importance sampling leave-one-out cross-validation ( [`MCMCDiagnosticTools.ess`](@extref). - `kwargs`: Remaining keywords are forwarded to [`PSIS.psis`](@extref). -See also: [`PSISLOOResult`](@ref) +# Returns + + - [`PSISLOOResult`](@ref): A container for the PSIS-LOO results. # Examples diff --git a/src/model_weights.jl b/src/model_weights.jl index 84acb4c..8c22ce1 100644 --- a/src/model_weights.jl +++ b/src/model_weights.jl @@ -9,64 +9,6 @@ Subtypes implement [`model_weights`](@ref)`(method, elpd_results)`. """ abstract type AbstractModelWeightsMethod end -""" - model_weights(elpd_results; method=Stacking()) - model_weights(method::AbstractModelWeightsMethod, elpd_results) - -Compute weights for each model in `elpd_results` using `method`. - -`elpd_results` is a `Tuple`, `NamedTuple`, or `AbstractVector` with -[`AbstractELPDResult`](@ref) entries. The weights are returned in the same type of -collection. - -[`Stacking`](@ref) is the recommended approach, as it performs well even when the true data -generating process is not included among the candidate models. See [Yao2018](@citet) for -details. - -See also: [`AbstractModelWeightsMethod`](@ref), [`compare`](@ref) - -# Examples - -Compute [`Stacking`](@ref) weights for two models: - -```jldoctest model_weights; filter = [r"└.*", r"(\\d+\\.\\d{3})\\d*" => s"\\1"] -julia> using ArviZExampleData - -julia> models = ( - centered=load_example_data("centered_eight"), - non_centered=load_example_data("non_centered_eight"), - ); - -julia> elpd_results = map(models) do idata - log_like = PermutedDimsArray(idata.log_likelihood.obs, (2, 3, 1)) - return loo(log_like) - end; -┌ Warning: 1 parameters had Pareto shape values 0.7 < k ≤ 1. Resulting importance sampling estimates are likely to be unstable. -└ @ PSIS ~/.julia/packages/PSIS/... - -julia> model_weights(elpd_results; method=Stacking()) |> pairs -pairs(::NamedTuple) with 2 entries: - :centered => 3.50546e-31 - :non_centered => 1.0 -``` - -Now we compute [`BootstrappedPseudoBMA`](@ref) weights for the same models: - -```jldoctest model_weights; setup = :(using Random; Random.seed!(94)) -julia> model_weights(elpd_results; method=BootstrappedPseudoBMA()) |> pairs -pairs(::NamedTuple) with 2 entries: - :centered => 0.492513 - :non_centered => 0.507487 -``` - -# References - -- [Yao2018](@cite) Yao et al. Bayesian Analysis 13, 3 (2018) -""" -function model_weights(elpd_results; method::AbstractModelWeightsMethod=Stacking()) - return model_weights(method, elpd_results) -end - # Akaike-type weights are defined as exp(-AIC/2), normalized to 1, which on the log-score # IC scale is equivalent to softmax akaike_weights!(w, elpds) = LogExpFunctions.softmax!(w, elpds) diff --git a/test/compare.jl b/test/compare.jl index 8045cc2..72c7a72 100644 --- a/test/compare.jl +++ b/test/compare.jl @@ -29,7 +29,7 @@ end @test mc1.elpd_diff.non_centered == 0.0 @test mc1.elpd_diff.centered > 0 @test mc1.weight == NamedTuple{(:non_centered, :centered)}( - model_weights(eight_schools_loo_results) + PosteriorStats.model_weights(Stacking(), eight_schools_loo_results) ) @test mc1.elpd_result == NamedTuple{(:non_centered, :centered)}(eight_schools_loo_results) @@ -41,11 +41,11 @@ end end @testset "keywords are forwarded" begin - mc2 = compare(eight_schools_loo_results; weights_method=PseudoBMA()) + mc2 = compare(eight_schools_loo_results; method=PseudoBMA()) @test !_isequal(mc2, compare(eight_schools_loo_results)) - @test mc2.weights_method === PseudoBMA() + @test mc2.method === PseudoBMA() mc3 = compare(eight_schools_loo_results; sort=false) - for k in filter(!=(:weights_method), propertynames(mc1)) + for k in filter(!=(:method), propertynames(mc1)) if k === :name @test getproperty(mc3, k) == reverse(getproperty(mc1, k)) else @@ -98,7 +98,7 @@ end end @testset "show" begin - mc5 = compare(eight_schools_loo_results; weights_method=PseudoBMA()) + mc5 = compare(eight_schools_loo_results; method=PseudoBMA()) @test sprint(show, "text/plain", mc1) == """ ModelComparisonResult with Stacking weights rank elpd se_elpd elpd_diff se_elpd_diff weight p se_p diff --git a/test/loo.jl b/test/loo.jl index 0fee532..fc12dba 100644 --- a/test/loo.jl +++ b/test/loo.jl @@ -32,17 +32,6 @@ using Test @test loo_result.psis_result.reff == pointwise.reff @test loo_result.psis_result.pareto_shape == pointwise.pareto_shape end - @testset "information criterion" begin - @test information_criterion(loo_result, :log) == estimates.elpd - @test information_criterion(loo_result, :negative_log) == -estimates.elpd - @test information_criterion(loo_result, :deviance) == -2 * estimates.elpd - @test information_criterion(loo_result, :log; pointwise=true) == - pointwise.elpd - @test information_criterion(loo_result, :negative_log; pointwise=true) == - -pointwise.elpd - @test information_criterion(loo_result, :deviance; pointwise=true) == - -2 * pointwise.elpd - end end end # @testset "keywords forwarded" begin diff --git a/test/model_weights.jl b/test/model_weights.jl index 4057fc7..c555639 100644 --- a/test/model_weights.jl +++ b/test/model_weights.jl @@ -12,17 +12,23 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end function test_model_weights(weights_method) @testset "weights are same collection as arguments" begin elpd_results_tuple = map(loo, (randn(1000, 4, 2, 3), randn(1000, 4, 2, 3))) - weights_tuple = @inferred model_weights(weights_method(), elpd_results_tuple) + weights_tuple = @inferred PosteriorStats.model_weights( + weights_method(), elpd_results_tuple + ) @test weights_tuple isa NTuple{2,Float64} @test sum(weights_tuple) ≈ 1 elpd_results_nt = NamedTuple{(:x, :y)}(elpd_results_tuple) - weights_nt = @inferred model_weights(weights_method(), elpd_results_nt) + weights_nt = @inferred PosteriorStats.model_weights( + weights_method(), elpd_results_nt + ) @test weights_nt isa NamedTuple{(:x, :y),NTuple{2,Float64}} @test _isapprox(values(weights_nt), weights_tuple) elpd_results_da = OffsetVector(collect(elpd_results_tuple), 0:1) - weights_da = @inferred model_weights(weights_method(), elpd_results_da) + weights_da = @inferred PosteriorStats.model_weights( + weights_method(), elpd_results_da + ) @test weights_da isa OffsetVector @test axes(weights_da) == axes(elpd_results_da) @test collect(weights_da) ≈ collect(weights_tuple) @@ -32,8 +38,8 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end elpd_results = map( loo, (randn(1000, 4, 10), randn(1000, 4, 10), randn(1000, 4, 10)) ) - weights1 = model_weights(weights_method(), elpd_results) - weights2 = model_weights(weights_method(), reverse(elpd_results)) + weights1 = PosteriorStats.model_weights(weights_method(), elpd_results) + weights2 = PosteriorStats.model_weights(weights_method(), reverse(elpd_results)) T = eltype(weights1) @test _isapprox(weights1, reverse(weights2); atol=sqrt(eps(T))) end @@ -42,7 +48,7 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end ll = randn(1000, 4, 10) result = loo(ll) elpd_results = fill(result, 3) - weights = model_weights(weights_method(), elpd_results) + weights = PosteriorStats.model_weights(weights_method(), elpd_results) @test sum(weights) ≈ 1 @test weights ≈ fill(weights[1], length(weights)) end @@ -50,7 +56,7 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end @testset "better model gets higher weight" begin data = log_likelihood_eight_schools() elpd_results = map(loo, data) - weights = model_weights(weights_method(), elpd_results) + weights = PosteriorStats.model_weights(weights_method(), elpd_results) @test sum(weights) ≈ 1 @test weights.non_centered > weights.centered end @@ -64,8 +70,8 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end @testset "regularization is respected" begin elpd_results = map(loo, [randn(1000, 4, 2, 3) for _ in 1:2]) - weights_reg = model_weights(PseudoBMA(true), elpd_results) - weights_nonreg = model_weights(PseudoBMA(false), elpd_results) + weights_reg = PosteriorStats.model_weights(PseudoBMA(true), elpd_results) + weights_nonreg = PosteriorStats.model_weights(PseudoBMA(false), elpd_results) @test !(weights_reg ≈ weights_nonreg) end end @@ -79,9 +85,11 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end @testset "number of samples can be configured" begin elpd_results = map(loo, [randn(1000, 4, 2, 3) for _ in 1:2]) rng = MersenneTwister(64) - weights1 = model_weights(BootstrappedPseudoBMA(; rng, samples=10), elpd_results) + weights1 = PosteriorStats.model_weights( + BootstrappedPseudoBMA(; rng, samples=10), elpd_results + ) rng = MersenneTwister(64) - weights2 = model_weights( + weights2 = PosteriorStats.model_weights( BootstrappedPseudoBMA(; rng, samples=100), elpd_results ) @test !(weights1 ≈ weights2) @@ -117,21 +125,16 @@ struct DummyOptimizer <: Optim.AbstractOptimizer end @test_throws ArgumentError Stacking(; optimizer=DummyOptimizer()) end - @testset "stacking is default" begin - elpd_results = map(loo, [randn(1000, 4, 2, 3) for _ in 1:2]) - @test model_weights(elpd_results) == model_weights(Stacking(), elpd_results) - end - test_model_weights(Stacking) @testset "alternate optimizer options are used" begin elpd_results = map(loo, [randn(1000, 4, 2, 3) for _ in 1:10]) - weights1 = model_weights(Stacking(), elpd_results) - weights2 = model_weights(Stacking(), elpd_results) + weights1 = PosteriorStats.model_weights(Stacking(), elpd_results) + weights2 = PosteriorStats.model_weights(Stacking(), elpd_results) optimizer = GradientDescent() - weights3 = model_weights(Stacking(; optimizer), elpd_results) + weights3 = PosteriorStats.model_weights(Stacking(; optimizer), elpd_results) options = Optim.Options(; iterations=2) - weights4 = model_weights(Stacking(; options), elpd_results) + weights4 = PosteriorStats.model_weights(Stacking(; options), elpd_results) @test weights3 != weights1 == weights2 != weights4 @test weights3 ≈ weights1 end