JuliaManifolds · kellertuer · Mar 24, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
diff --git a/Changelog.md b/Changelog.md
@@ -8,6 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.5.9] unreleased
 
+### Added
+
+* add a `PreconditionedDirection` variant to the `direction` gradient processor
+  keyword argument and its corresponding `PreconditionedDirectionRule`
+* make the preconditioner available in quasi Newton.
+* in `gradient_descent` and `conjugate_gradient_descent` the rule can be added anyways.
+
 ### Fixed
 
 * the links in the AD tutorial are fixed and moved to using `extref`

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
-version = "0.5.8"
+version = "0.5.9"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"

diff --git a/docs/src/solvers/gradient_descent.md b/docs/src/solvers/gradient_descent.md
@@ -25,6 +25,7 @@ DirectionUpdateRule
 IdentityUpdateRule
 MomentumGradient
 Nesterov
+PreconditionedDirection
 ```
 
 which internally use the [`ManifoldDefaultsFactory`](@ref) and produce the internal
@@ -35,6 +36,7 @@ Manopt.AverageGradientRule
 Manopt.ConjugateDescentCoefficientRule
 Manopt.MomentumGradientRule
 Manopt.NesterovRule
+Manopt.PreconditionedDirectionRule
 ```
 
 ## Debug actions

diff --git a/src/Manopt.jl b/src/Manopt.jl
@@ -438,7 +438,8 @@ export AbstractMeshSearchFunction, DefaultMeshAdaptiveDirectSearch
 #
 # Direction Update Rules
 export DirectionUpdateRule
-export Gradient, StochasticGradient, AverageGradient, MomentumGradient, Nesterov
+export Gradient, StochasticGradient
+export AverageGradient, MomentumGradient, Nesterov, PreconditionedDirection
 export SteepestDescentCoefficient,
     HestenesStiefelCoefficient,
     FletcherReevesCoefficient,

diff --git a/src/plans/gradient_plan.jl b/src/plans/gradient_plan.jl
@@ -405,7 +405,7 @@ $(_var(:Keyword, :vector_transport_method))
 Add average to a gradient problem, where
 
 * `n`:                       determines the size of averaging
-* `direction`:                       is the internal [`DirectionUpdateRule`](@ref) to determine the gradients to store
+* `direction`:               is the internal [`DirectionUpdateRule`](@ref) to determine the gradients to store
 * `gradients`:               can be pre-filled with some history
 * `last_iterate`:            stores the last iterate
 $(_var(:Keyword, :vector_transport_method))
@@ -457,7 +457,7 @@ them to the current iterates tangent space.
 
 # Input
 
-* `M` (optional)
+$(_var(:Argument, :M; type=true)) (optional)
 
 # Keyword arguments
 
@@ -570,13 +570,13 @@ Then the direction from ``p_k`` to ``p_k+1`` by ``d = $(_tex(:invretr))_{p_k}p_{
 
 # Input
 
-* `M` (optional)
+$(_var(:Argument, :M; type=true)) (optional)
 
 # Keyword arguments
 
 $(_var(:Keyword, :p; add=:as_Initial))
-* `γ=0.001``
-* `μ=0.9``
+* `γ=0.001`
+* `μ=0.9`
 * `shrinkage = k -> 0.8`
 $(_var(:Keyword, :inverse_retraction_method))
 
@@ -586,6 +586,105 @@ function Nesterov(args...; kwargs...)
     return ManifoldDefaultsFactory(Manopt.NesterovRule, args...; kwargs...)
 end
 
+"""
+    PreconditionedDirectionRule{E<:AbstractEvaluationType} <: DirectionUpdateRule
+
+Add a preconditioning as gradient processor, see [`PreconditionedDirection`](@ref)
+for more mathematical background.
+
+# Fields
+
+* `direction`:      internal [`DirectionUpdateRule`](@ref) to determine directions to apply the preconditioning to
+* `preconditioner`: the preconditioner function
+
+# Constructors
+
+    PreconditionedDirectionRule(
+        M::AbstractManifold,
+        preconditioner;
+        direction::Union{<:DirectionUpdateRule,ManifoldDefaultsFactory}=IdentityUpdateRule(),
+        evaluation::AbstractEvaluationType=AllocatingEvaluation()        n::Int=10
+    )
+
+Add preconditioning to a gradient problem.
+
+# Input
+
+$(_var(:Argument, :M; type=true))
+* `preconditioner`:   preconditioner function, either as a `(M, p, X)` -> Y` allocating or `(M, Y, p, X) -> Y` mutating function
+
+# Keyword arguments
+
+$(_var(:Keyword, :evaluation))
+* `direction=`[`IdentityUpdateRule`](@ref) internal [`DirectionUpdateRule`](@ref) to determine the gradients to store or a [`ManifoldDefaultsFactory`](@ref) generating one
+"""
+mutable struct PreconditionedDirectionRule{
+    E<:AbstractEvaluationType,D<:DirectionUpdateRule,F
+} <: DirectionUpdateRule
+    preconditioner::F
+    direction::D
+end
+function PreconditionedDirectionRule(
+    M::AbstractManifold,
+    preconditioner::F;
+    direction::Union{<:DirectionUpdateRule,ManifoldDefaultsFactory}=Gradient(),
+    evaluation::E=AllocatingEvaluation(),
+) where {E<:AbstractEvaluationType,F}
+    dir = _produce_type(direction, M)
+    return PreconditionedDirectionRule{E,typeof(dir),F}(preconditioner, dir)
+end
+function (pg::PreconditionedDirectionRule{AllocatingEvaluation})(
+    mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
+)
+    M = get_manifold(mp)
+    p = get_iterate(s)
+    # get inner direction and step size
+    step, dir = pg.direction(mp, s, k)
+    # precondition and set as gradient
+    set_gradient!(s, M, p, pg.preconditioner(M, p, dir))
+    return step, get_gradient(s)
+end
+function (pg::PreconditionedDirectionRule{InplaceEvaluation})(
+    mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
+)
+    M = get_manifold(mp)
+    p = get_iterate(s)
+    step, dir = pg.direction(mp, s, k) # get inner direction and step size
+    pg.preconditioner(M, dir, p, dir)
+    return step, dir
+end
+
+"""
+    PreconditionedDirection(preconditioner; kwargs...)
+    PreconditionedDirection(M::AbstractManifold, preconditioner; kwargs...)
+
+Add a preconditioner to a gradient processor following the [motivation for optimization](https://en.wikipedia.org/wiki/Preconditioner#Preconditioning_in_optimization),
+as a linear invertible map ``P: $(_math(:TpM)) → $(_math(:TpM))`` that usually should be
+
+* symmetric: ``⟨X, P(Y)⟩ = ⟨P(X), Y⟩``
+* positive definite ``⟨X, P(X)⟩ > 0`` for ``X`` not the zero-vector
+
+The gradient is then preconditioned as ``P(X)``, where ``X`` is either the
+gradient of the objective or the result of a previous (internally stored) gradient processor.
+
+For example if you provide as the preconditioner the inverse of the Hessian ``$(_tex(:Hess))^{-1} f``,
+you turn a gradient descent into a Newton method.
+
+# Arguments
+
+$(_var(:Argument, :M; type=true)) (optional)
+* `preconditioner`:   preconditioner function, either as a `(M, p, X) -> Y` allocating or `(M, Y, p, X) -> Y` mutating function
+
+# Keyword arguments
+
+* `direction=`[`IdentityUpdateRule`](@ref) internal [`DirectionUpdateRule`](@ref) to determine the gradients to store or a [`ManifoldDefaultsFactory`](@ref) generating one
+$(_var(:Keyword, :evaluation))
+
+$(_note(:ManifoldDefaultFactory, "PreconditionedDirectionRule"))
+"""
+PreconditionedDirection(args...; kwargs...) =
+    ManifoldDefaultsFactory(Manopt.PreconditionedDirectionRule, args...; kwargs...)
+
 @doc raw"""
     DebugGradient <: DebugAction
 

diff --git a/src/solvers/gradient_descent.jl b/src/solvers/gradient_descent.jl
@@ -74,7 +74,9 @@ function GradientDescentState(
         p, X, direction, stepsize, stopping_criterion, retraction_method
     )
 end
-function (r::IdentityUpdateRule)(mp::AbstractManoptProblem, s::GradientDescentState, k)
+function (r::IdentityUpdateRule)(
+    mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
+)
     return get_stepsize(mp, s, k), get_gradient!(mp, s.X, s.p)
 end
 function default_stepsize(

diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl
@@ -12,6 +12,7 @@ all necessary fields.
 * `nondescent_direction_value`:    the value from the last inner product from checking for descent directions
 $(_var(:Field, :p; add=[:as_Iterate]))
 * `p_old`:                         the last iterate
+* `preconditioner`                 an [`DirectionUpdateRule`](@ref) to either store the [`IdentityUpdateRule`](@ref) or a [`PreconditionedDirectionRule`](@ref)
 * `sk`:                            the current step
 * `yk`:                            the current gradient difference
 $(_var(:Field, :retraction_method))
@@ -31,6 +32,7 @@ Generate the Quasi Newton state on the manifold `M` with start point `p`.
 
 * `direction_update=`[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref)`(M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method)`
 $(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`9(@ref)`(1000)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-6)`"))
+* `preconditioner::Union{`[`PreconditionedDirectionRule`](@ref)`, Nothing}` specify a preconditioner
 $(_var(:Keyword, :retraction_method))
 $(_var(:Keyword, :stepsize; default="[`default_stepsize`](@ref)`(M, QuasiNewtonState)`"))
 $(_var(:Keyword, :vector_transport_method))
@@ -49,6 +51,7 @@ mutable struct QuasiNewtonState{
     RTR<:AbstractRetractionMethod,
     VT<:AbstractVectorTransportMethod,
     R,
+    TPrecon<:DirectionUpdateRule,
 } <: AbstractGradientSolverState
     p::P
     p_old::P
@@ -57,6 +60,7 @@ mutable struct QuasiNewtonState{
     sk::T
     yk::T
     direction_update::D
+    preconditioner::TPrecon
     retraction_method::RTR
     stepsize::S
     stop::SC
@@ -74,6 +78,7 @@ function QuasiNewtonState(
     direction_update::D=QuasiNewtonLimitedMemoryDirectionUpdate(
         M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method
     ),
+    preconditioner::Union{PreconditionedDirectionRule,ManifoldDefaultsFactory,Nothing}=nothing,
     stopping_criterion::SC=StopAfterIteration(1000) | StopWhenGradientNormLess(1e-6),
     retraction_method::RM=default_retraction_method(M, typeof(p)),
     stepsize::S=default_stepsize(
@@ -93,14 +98,17 @@ function QuasiNewtonState(
     RM<:AbstractRetractionMethod,
     VTM<:AbstractVectorTransportMethod,
 }
-    return QuasiNewtonState{P,T,D,SC,S,RM,VTM,Float64}(
+    n = isnothing(preconditioner)
+    precon = n ? IdentityUpdateRule() : _produce_type(preconditioner, M)
+    return QuasiNewtonState{P,T,D,SC,S,RM,VTM,Float64,typeof(precon)}(
         p,
         copy(M, p),
         copy(M, p, X),
         X,
         copy(M, p, X),
         copy(M, p, X),
         direction_update,
+        precon,
         retraction_method,
         stepsize,
         stopping_criterion,
@@ -217,6 +225,11 @@ $(_var(:Keyword, :evaluation; add=:GradientExample))
   * `:reinitialize_direction_update`: discards operator state stored in direction update rules.
   * any other value performs the verification, keeps the direction but stores a message.
   A stored message can be displayed using [`DebugMessages`](@ref).
+* `preconditioner=nothing` specify a preconditioner, either
+  * the default `nothing` does not activate a preconditioning
+  * a function of the form `(M, p, X) -> Y` or mutating `(M, Y, p, X) -> Y` depending on the `evaluation`
+  * a [`PreconditionedDirection`](@ref). See also their docs for mor details on the preconditioner.
+  Note that the preconditioner is applied to the gradient, i.e. the right hand side _before_ solving the linear systen
 * `project!=copyto!`: for numerical stability it is possible to project onto the tangent space after every iteration.
   the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory.
 $(_var(:Keyword, :retraction_method))
@@ -288,6 +301,7 @@ function quasi_Newton!(
         end
     ),
     initial_scale::Real=1.0,
+    preconditioner=nothing,
     stepsize::Union{Stepsize,ManifoldDefaultsFactory}=default_stepsize(
         M,
         QuasiNewtonState;
@@ -297,7 +311,10 @@ function quasi_Newton!(
     stopping_criterion::StoppingCriterion=StopAfterIteration(max(1000, memory_size)) |
                                           StopWhenGradientNormLess(1e-6),
     kwargs...,
-) where {O<:Union{AbstractManifoldGradientObjective,AbstractDecoratedManifoldObjective}}
+) where {
+    E<:AbstractEvaluationType,
+    O<:Union{AbstractManifoldGradientObjective{E},AbstractDecoratedManifoldObjective{E}},
+}
     if memory_size >= 0
         local_dir_upd = QuasiNewtonLimitedMemoryDirectionUpdate(
             M,
@@ -331,6 +348,11 @@ function quasi_Newton!(
         initial_vector=get_gradient(mp, p),
         direction_update=local_dir_upd,
         stopping_criterion=stopping_criterion,
+        preconditioner=if preconditioner isa Function
+            PreconditionedDirectionRule(M, preconditioner; evaluation=E())
+        else
+            preconditioner
+        end,
         stepsize=_produce_type(stepsize, M),
         retraction_method=retraction_method,
         vector_transport_method=vector_transport_method,
@@ -349,7 +371,8 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState)
 end
 function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, k)
     M = get_manifold(mp)
-    get_gradient!(mp, qns.X, qns.p)
+    # get gradient – eventually via preconditioning.
+    qns.preconditioner(mp, qns, k)
     qns.direction_update(qns.η, mp, qns)
     if !(qns.nondescent_direction_behavior === :ignore)
         qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X))

diff --git a/test/solvers/test_gradient_descent.jl b/test/solvers/test_gradient_descent.jl
@@ -107,9 +107,31 @@ using ManifoldDiff: grad_distance
             grad_f,
             data[1];
             stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
-            direction=Nesterov(M; p=copy(M, data[1])),
+            direction=Nesterov(; p=copy(M, data[1])),
         )
         @test isapprox(M, p, p6; atol=1e-13)
+        # Precon in simple scale down by 2
+        p7 = gradient_descent(
+            M,
+            f,
+            grad_f,
+            data[1];
+            stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
+            direction=PreconditionedDirection((M, p, X) -> 0.5 .* X),
+        )
+        @test isapprox(M, p, p7; atol=1e-13)
+        # Precon in simple scale down by 2 – inplace
+        p8 = gradient_descent(
+            M,
+            f,
+            grad_f,
+            data[1];
+            stopping_criterion=StopAfterIteration(1000) | StopWhenChangeLess(M, 1e-16),
+            direction=PreconditionedDirection(
+                (M, Y, p, X) -> (Y .= 0.5 .* X); evaluation=InplaceEvaluation()
+            ),
+        )
+        @test isapprox(M, p, p8; atol=1e-13)
         M2 = Euclidean()
         @test_logs (
             :warn,

diff --git a/test/solvers/test_quasi_Newton.jl b/test/solvers/test_quasi_Newton.jl
@@ -188,6 +188,12 @@ end
         )
         @test isapprox(M, x_lrbfgs2, x_lrbfgs)
 
+        # A simple precon
+        x_lrbfgs = quasi_Newton(
+            M, f, grad_f, x; memory_size=-1, preconditioner=(M, p, X) -> 0.5 .* X
+        )
+        @test isapprox(M, x_lrbfgs, x_solution; atol=rayleigh_atol)
+
         x_clrbfgs = quasi_Newton(M, f, grad_f, x; cautious_update=true)
         @test isapprox(M, x_clrbfgs, x_solution; atol=rayleigh_atol)
 
@@ -401,7 +407,8 @@ end
         M = Euclidean(2)
         p = [0.0, 1.0]
         f(M, p) = sum(p .^ 2)
-        grad_f(M, p) = 2 * sum(p)
+        # A wrong gradient
+        grad_f(M, p) = -2 .* p
         gmp = ManifoldGradientObjective(f, grad_f)
         mp = DefaultManoptProblem(M, gmp)
         qns = QuasiNewtonState(