Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.5.9] unreleased

### Added

* add a `PreconditionedDirection` variant to the `direction` gradient processor
keyword argument and its corresponding `PreconditionedDirectionRule`
* make the preconditioner available in quasi Newton.
* in `gradient_descent` and `conjugate_gradient_descent` the rule can be added anyways.

### Fixed

* the links in the AD tutorial are fixed and moved to using `extref`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Manopt"
uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
version = "0.5.8"
version = "0.5.9"

[deps]
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
Expand Down
2 changes: 2 additions & 0 deletions docs/src/solvers/gradient_descent.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ DirectionUpdateRule
IdentityUpdateRule
MomentumGradient
Nesterov
PreconditionedDirection
```

which internally use the [`ManifoldDefaultsFactory`](@ref) and produce the internal
Expand All @@ -35,6 +36,7 @@ Manopt.AverageGradientRule
Manopt.ConjugateDescentCoefficientRule
Manopt.MomentumGradientRule
Manopt.NesterovRule
Manopt.PreconditionedDirectionRule
```

## Debug actions
Expand Down
3 changes: 2 additions & 1 deletion src/Manopt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,8 @@ export AbstractMeshSearchFunction, DefaultMeshAdaptiveDirectSearch
#
# Direction Update Rules
export DirectionUpdateRule
export Gradient, StochasticGradient, AverageGradient, MomentumGradient, Nesterov
export Gradient, StochasticGradient
export AverageGradient, MomentumGradient, Nesterov, PreconditionedDirection
export SteepestDescentCoefficient,
HestenesStiefelCoefficient,
FletcherReevesCoefficient,
Expand Down
109 changes: 104 additions & 5 deletions src/plans/gradient_plan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ $(_var(:Keyword, :vector_transport_method))
Add average to a gradient problem, where

* `n`: determines the size of averaging
* `direction`: is the internal [`DirectionUpdateRule`](@ref) to determine the gradients to store
* `direction`: is the internal [`DirectionUpdateRule`](@ref) to determine the gradients to store
* `gradients`: can be pre-filled with some history
* `last_iterate`: stores the last iterate
$(_var(:Keyword, :vector_transport_method))
Expand Down Expand Up @@ -457,7 +457,7 @@ them to the current iterates tangent space.

# Input

* `M` (optional)
$(_var(:Argument, :M; type=true)) (optional)

# Keyword arguments

Expand Down Expand Up @@ -570,13 +570,13 @@ Then the direction from ``p_k`` to ``p_k+1`` by ``d = $(_tex(:invretr))_{p_k}p_{

# Input

* `M` (optional)
$(_var(:Argument, :M; type=true)) (optional)

# Keyword arguments

$(_var(:Keyword, :p; add=:as_Initial))
* `γ=0.001``
* `μ=0.9``
* `γ=0.001`
* `μ=0.9`
* `shrinkage = k -> 0.8`
$(_var(:Keyword, :inverse_retraction_method))

Expand All @@ -586,6 +586,105 @@ function Nesterov(args...; kwargs...)
return ManifoldDefaultsFactory(Manopt.NesterovRule, args...; kwargs...)
end

"""
PreconditionedDirectionRule{E<:AbstractEvaluationType} <: DirectionUpdateRule

Add a preconditioning as gradient processor, see [`PreconditionedDirection`](@ref)
for more mathematical background.

# Fields

* `direction`: internal [`DirectionUpdateRule`](@ref) to determine directions to apply the preconditioning to
* `preconditioner`: the preconditioner function

# Constructors

PreconditionedDirectionRule(
M::AbstractManifold,
preconditioner;
direction::Union{<:DirectionUpdateRule,ManifoldDefaultsFactory}=IdentityUpdateRule(),
evaluation::AbstractEvaluationType=AllocatingEvaluation() n::Int=10
Comment thread
kellertuer marked this conversation as resolved.
Outdated
)

Add preconditioning to a gradient problem.

# Input

$(_var(:Argument, :M; type=true))
* `preconditioner`: preconditioner function, either as a `(M, p, X)` -> Y` allocating or `(M, Y, p, X) -> Y` mutating function

# Keyword arguments

$(_var(:Keyword, :evaluation))
* `direction=`[`IdentityUpdateRule`](@ref) internal [`DirectionUpdateRule`](@ref) to determine the gradients to store or a [`ManifoldDefaultsFactory`](@ref) generating one
"""
mutable struct PreconditionedDirectionRule{
E<:AbstractEvaluationType,D<:DirectionUpdateRule,F
} <: DirectionUpdateRule
preconditioner::F
direction::D
end
function PreconditionedDirectionRule(
M::AbstractManifold,
preconditioner::F;
direction::Union{<:DirectionUpdateRule,ManifoldDefaultsFactory}=Gradient(),
evaluation::E=AllocatingEvaluation(),
) where {E<:AbstractEvaluationType,F}
dir = _produce_type(direction, M)
return PreconditionedDirectionRule{E,typeof(dir),F}(preconditioner, dir)
end
function (pg::PreconditionedDirectionRule{AllocatingEvaluation})(
mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
)
M = get_manifold(mp)
p = get_iterate(s)
# get inner direction and step size
step, dir = pg.direction(mp, s, k)
# precondition and set as gradient
set_gradient!(s, M, p, pg.preconditioner(M, p, dir))
return step, get_gradient(s)
end
function (pg::PreconditionedDirectionRule{InplaceEvaluation})(
mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
)
M = get_manifold(mp)
p = get_iterate(s)
step, dir = pg.direction(mp, s, k) # get inner direction and step size
pg.preconditioner(M, dir, p, dir)
return step, dir
end

"""
PreconditionedDirection(preconditioner; kwargs...)
PreconditionedDirection(M::AbstractManifold, preconditioner; kwargs...)

Add a preconditioner to a gradient processor following the [motivation for optimization](https://en.wikipedia.org/wiki/Preconditioner#Preconditioning_in_optimization),
as a linear invertible map ``P: $(_math(:TpM)) → $(_math(:TpM))`` that usually should be

* symmetric: ``⟨X, P(Y)⟩ = ⟨P(X), Y⟩``
* positive definite ``⟨X, P(X)⟩ > 0`` for ``X`` not the zero-vector

The gradient is then preconditioned as ``P(X)``, where ``X`` is either the
gradient of the objective or the result of a previous (internally stored) gradient processor.

For example if you provide as the preconditioner the inverse of the Hessian ``$(_tex(:Hess))^{-1} f``,
you turn a gradient descent into a Newton method.

# Arguments

$(_var(:Argument, :M; type=true)) (optional)
* `preconditioner`: preconditioner function, either as a `(M, p, X) -> Y` allocating or `(M, Y, p, X) -> Y` mutating function

# Keyword arguments

* `direction=`[`IdentityUpdateRule`](@ref) internal [`DirectionUpdateRule`](@ref) to determine the gradients to store or a [`ManifoldDefaultsFactory`](@ref) generating one
$(_var(:Keyword, :evaluation))

$(_note(:ManifoldDefaultFactory, "PreconditionedDirectionRule"))
"""
PreconditionedDirection(args...; kwargs...) =
ManifoldDefaultsFactory(Manopt.PreconditionedDirectionRule, args...; kwargs...)

@doc raw"""
DebugGradient <: DebugAction

Expand Down
4 changes: 3 additions & 1 deletion src/solvers/gradient_descent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ function GradientDescentState(
p, X, direction, stepsize, stopping_criterion, retraction_method
)
end
function (r::IdentityUpdateRule)(mp::AbstractManoptProblem, s::GradientDescentState, k)
function (r::IdentityUpdateRule)(
mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
)
return get_stepsize(mp, s, k), get_gradient!(mp, s.X, s.p)
end
function default_stepsize(
Expand Down
29 changes: 26 additions & 3 deletions src/solvers/quasi_Newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ all necessary fields.
* `nondescent_direction_value`: the value from the last inner product from checking for descent directions
$(_var(:Field, :p; add=[:as_Iterate]))
* `p_old`: the last iterate
* `preconditioner` an [`DirectionUpdateRule`](@ref) to either store the [`IdentityUpdateRule`](@ref) or a [`PreconditionedDirectionRule`](@ref)
* `sk`: the current step
* `yk`: the current gradient difference
$(_var(:Field, :retraction_method))
Expand All @@ -31,6 +32,7 @@ Generate the Quasi Newton state on the manifold `M` with start point `p`.

* `direction_update=`[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref)`(M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method)`
$(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`9(@ref)`(1000)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-6)`"))
* `preconditioner::Union{`[`PreconditionedDirectionRule`](@ref)`, Nothing}` specify a preconditioner
$(_var(:Keyword, :retraction_method))
$(_var(:Keyword, :stepsize; default="[`default_stepsize`](@ref)`(M, QuasiNewtonState)`"))
$(_var(:Keyword, :vector_transport_method))
Expand All @@ -49,6 +51,7 @@ mutable struct QuasiNewtonState{
RTR<:AbstractRetractionMethod,
VT<:AbstractVectorTransportMethod,
R,
TPrecon<:DirectionUpdateRule,
} <: AbstractGradientSolverState
p::P
p_old::P
Expand All @@ -57,6 +60,7 @@ mutable struct QuasiNewtonState{
sk::T
yk::T
direction_update::D
preconditioner::TPrecon
retraction_method::RTR
stepsize::S
stop::SC
Expand All @@ -74,6 +78,7 @@ function QuasiNewtonState(
direction_update::D=QuasiNewtonLimitedMemoryDirectionUpdate(
M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method
),
preconditioner::Union{PreconditionedDirectionRule,ManifoldDefaultsFactory,Nothing}=nothing,
stopping_criterion::SC=StopAfterIteration(1000) | StopWhenGradientNormLess(1e-6),
retraction_method::RM=default_retraction_method(M, typeof(p)),
stepsize::S=default_stepsize(
Expand All @@ -93,14 +98,17 @@ function QuasiNewtonState(
RM<:AbstractRetractionMethod,
VTM<:AbstractVectorTransportMethod,
}
return QuasiNewtonState{P,T,D,SC,S,RM,VTM,Float64}(
n = isnothing(preconditioner)
precon = n ? IdentityUpdateRule() : _produce_type(preconditioner, M)
return QuasiNewtonState{P,T,D,SC,S,RM,VTM,Float64,typeof(precon)}(
p,
copy(M, p),
copy(M, p, X),
X,
copy(M, p, X),
copy(M, p, X),
direction_update,
precon,
retraction_method,
stepsize,
stopping_criterion,
Expand Down Expand Up @@ -217,6 +225,11 @@ $(_var(:Keyword, :evaluation; add=:GradientExample))
* `:reinitialize_direction_update`: discards operator state stored in direction update rules.
* any other value performs the verification, keeps the direction but stores a message.
A stored message can be displayed using [`DebugMessages`](@ref).
* `preconditioner=nothing` specify a preconditioner, either
* the default `nothing` does not activate a preconditioning
* a function of the form `(M, p, X) -> Y` or mutating `(M, Y, p, X) -> Y` depending on the `evaluation`
* a [`PreconditionedDirection`](@ref). See also their docs for mor details on the preconditioner.
Note that the preconditioner is applied to the gradient, i.e. the right hand side _before_ solving the linear systen
Comment thread
kellertuer marked this conversation as resolved.
Outdated
* `project!=copyto!`: for numerical stability it is possible to project onto the tangent space after every iteration.
the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory.
$(_var(:Keyword, :retraction_method))
Expand Down Expand Up @@ -288,6 +301,7 @@ function quasi_Newton!(
end
),
initial_scale::Real=1.0,
preconditioner=nothing,
stepsize::Union{Stepsize,ManifoldDefaultsFactory}=default_stepsize(
M,
QuasiNewtonState;
Expand All @@ -297,7 +311,10 @@ function quasi_Newton!(
stopping_criterion::StoppingCriterion=StopAfterIteration(max(1000, memory_size)) |
StopWhenGradientNormLess(1e-6),
kwargs...,
) where {O<:Union{AbstractManifoldGradientObjective,AbstractDecoratedManifoldObjective}}
) where {
E<:AbstractEvaluationType,
O<:Union{AbstractManifoldGradientObjective{E},AbstractDecoratedManifoldObjective{E}},
}
if memory_size >= 0
local_dir_upd = QuasiNewtonLimitedMemoryDirectionUpdate(
M,
Expand Down Expand Up @@ -331,6 +348,11 @@ function quasi_Newton!(
initial_vector=get_gradient(mp, p),
direction_update=local_dir_upd,
stopping_criterion=stopping_criterion,
preconditioner=if preconditioner isa Function
PreconditionedDirectionRule(M, preconditioner; evaluation=E())
else
preconditioner
end,
stepsize=_produce_type(stepsize, M),
retraction_method=retraction_method,
vector_transport_method=vector_transport_method,
Expand All @@ -349,7 +371,8 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState)
end
function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, k)
M = get_manifold(mp)
get_gradient!(mp, qns.X, qns.p)
# get gradient – eventually via preconditioning.
qns.preconditioner(mp, qns, k)
qns.direction_update(qns.η, mp, qns)
if !(qns.nondescent_direction_behavior === :ignore)
qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X))
Expand Down
24 changes: 23 additions & 1 deletion test/solvers/test_gradient_descent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,31 @@ using ManifoldDiff: grad_distance
grad_f,
data[1];
stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
direction=Nesterov(M; p=copy(M, data[1])),
direction=Nesterov(; p=copy(M, data[1])),
)
@test isapprox(M, p, p6; atol=1e-13)
# Precon in simple scale down by 2
p7 = gradient_descent(
M,
f,
grad_f,
data[1];
stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
direction=PreconditionedDirection((M, p, X) -> 0.5 .* X),
)
@test isapprox(M, p, p7; atol=1e-13)
# Precon in simple scale down by 2 – inplace
p8 = gradient_descent(
M,
f,
grad_f,
data[1];
stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
direction=PreconditionedDirection(
(M, Y, p, X) -> (Y .= 0.5 .* X); evaluation=InplaceEvaluation()
),
)
@test isapprox(M, p, p8; atol=1e-13)
M2 = Euclidean()
@test_logs (
:warn,
Expand Down
9 changes: 8 additions & 1 deletion test/solvers/test_quasi_Newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ end
)
@test isapprox(M, x_lrbfgs2, x_lrbfgs)

# A simple precon
x_lrbfgs = quasi_Newton(
M, f, grad_f, x; memory_size=-1, preconditioner=(M, p, X) -> 0.5 .* X
)
@test isapprox(M, x_lrbfgs, x_solution; atol=rayleigh_atol)

x_clrbfgs = quasi_Newton(M, f, grad_f, x; cautious_update=true)
@test isapprox(M, x_clrbfgs, x_solution; atol=rayleigh_atol)

Expand Down Expand Up @@ -401,7 +407,8 @@ end
M = Euclidean(2)
p = [0.0, 1.0]
f(M, p) = sum(p .^ 2)
grad_f(M, p) = 2 * sum(p)
# A wrong gradient
grad_f(M, p) = -2 .* p
gmp = ManifoldGradientObjective(f, grad_f)
mp = DefaultManoptProblem(M, gmp)
qns = QuasiNewtonState(
Expand Down
Loading