Skip to content

Commit b94b0f1

Browse files
committed
start integration of GCP into quasi_Newton
1 parent 82ac90a commit b94b0f1

6 files changed

Lines changed: 94 additions & 11 deletions

File tree

docs/src/references.bib

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,20 @@ @book{Boumal:2023
260260
ISBN = {978-1-00-916616-4}
261261
}
262262

263+
@article{ByrdNocedalSchnabel:1994,
264+
title = {Representations of quasi-{Newton} matrices and their use in limited memory methods},
265+
volume = {63},
266+
issn = {1436-4646},
267+
doi = {10.1007/BF01582063},
268+
number = {1},
269+
urldate = {2025-09-06},
270+
journal = {Mathematical Programming},
271+
author = {Byrd, Richard H. and Nocedal, Jorge and Schnabel, Robert B.},
272+
month = jan,
273+
year = {1994},
274+
pages = {129--156},
275+
}
276+
263277
@article{ByrdLuNocedalZhu:1995,
264278
title = {A {Limited} {Memory} {Algorithm} for {Bound} {Constrained} {Optimization}},
265279
volume = {16},

src/helpers/LineSearchesTypes.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ Wrapper for line searches available in the `LineSearches.jl` library.
1414
1515
Wrap `linesearch` (for example [`HagerZhang`](https://julianlsolvers.github.io/LineSearches.jl/latest/reference/linesearch.html#LineSearches.HagerZhang)
1616
or [`MoreThuente`](https://julianlsolvers.github.io/LineSearches.jl/latest/reference/linesearch.html#LineSearches.MoreThuente)).
17-
The initial step selection from Linesearches.jl is not yet supported and the value 1.0 is used.
17+
The initial step selection from Linesearches.jl is not yet supported and `initial_guess` is
18+
always used (by default [`ConstantInitialGuess`](@ref)).
1819
1920
# Keyword Arguments
2021

src/plans/box_plan.jl

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,24 @@ with a standard manifold. Otherwise return `false`.
77
requires_gcp(::AbstractManifold) = false
88
requires_gcp(M::ProductManifold) = requires_gcp(M.manifolds[1])
99

10+
@doc raw"""
11+
mutable struct LimitedMemoryHessianApproximation end
12+
13+
An approximation of Hessian of a scalar function of the form ``B_0 = θ I``,
14+
``B_{k+1} = B_k - W_k M_k W_k^{\mathrm{T}}``,
15+
where ``\theta > 0`` is an initial scaling guess.
16+
Matrix ``M_k = \begin{psmallmatrix}M_{11} & M_{21}^{\mathrm{T}}\\ M_{21} & M_{22}\end{psmallmatrix}``
17+
is stored using its blocks.
18+
Blocks ``W_k`` are (implicitly) composed from `memory_y` and `memory_s`.
19+
20+
Initial scale ``\theta`` is stored in the field `initial_scale` but if the memory isn't empty,
21+
the current scale is set to squared norm of $s_k$ divided by inner product of ``s_k`` and ``y_k``
22+
where ``k`` is the oldest index for which the denominator is not equal to 0.
23+
24+
See [ByrdNocedalSchnabel:1994](@cite) for details.
25+
"""
1026
mutable struct QuasiNewtonLimitedMemoryBoxDirectionUpdate{
1127
TDU <: QuasiNewtonLimitedMemoryDirectionUpdate,
12-
TM <: AbstractManifold,
1328
F <: Real,
1429
T_HM <: AbstractMatrix,
1530
V <: AbstractVector,
@@ -22,18 +37,60 @@ mutable struct QuasiNewtonLimitedMemoryBoxDirectionUpdate{
2237
M_11::T_HM
2338
M_21::T_HM
2439
M_22::T_HM
25-
# buffer for calculating stuff
40+
# buffer for calculating W_k blocks
2641
coords_Sk_X::V
2742
coords_Sk_Y::V
2843
coords_Yk_X::V
2944
coords_Yk_Y::V
3045
end
3146

47+
function QuasiNewtonLimitedMemoryBoxDirectionUpdate(
48+
qn_du::QuasiNewtonLimitedMemoryDirectionUpdate{<:AbstractQuasiNewtonUpdateRule, T, F}
49+
) where {T, F <: Real}
50+
memory_size = capacity(qn_du.memory_s)
51+
M_11 = zeros(F, memory_size, memory_size)
52+
M_21 = zeros(F, memory_size, memory_size)
53+
M_22 = zeros(F, memory_size, memory_size)
54+
coords_Sk_X = zeros(F, memory_size)
55+
coords_Sk_Y = zeros(F, memory_size)
56+
coords_Yk_X = zeros(F, memory_size)
57+
coords_Yk_Y = zeros(F, memory_size)
58+
return QuasiNewtonLimitedMemoryBoxDirectionUpdate{
59+
typeof(qn_du), F, typeof(M_11), typeof(coords_Sk_X),
60+
}(
61+
qn_du,
62+
qn_du.initial_scale,
63+
M_11,
64+
M_21,
65+
M_22,
66+
coords_Sk_X,
67+
coords_Sk_Y,
68+
coords_Yk_X,
69+
coords_Yk_Y,
70+
)
71+
end
72+
3273
function initialize_update!(ha::QuasiNewtonLimitedMemoryBoxDirectionUpdate)
3374
initialize_update!(ha.qn_du)
3475
return ha
3576
end
3677

78+
function (d::QuasiNewtonLimitedMemoryBoxDirectionUpdate)(
79+
mp::AbstractManoptProblem, st
80+
)
81+
r = zero_vector(get_manifold(mp), get_iterate(st))
82+
return d(r, mp, st)
83+
end
84+
function (d::QuasiNewtonLimitedMemoryBoxDirectionUpdate)(
85+
r, mp::AbstractManoptProblem, st
86+
)
87+
d.qn_du(r, mp, st)
88+
# TODO find_gcp_direction!
89+
return r
90+
end
91+
92+
get_update_vector_transport(u::QuasiNewtonLimitedMemoryBoxDirectionUpdate) = get_update_vector_transport(u.qn_du)
93+
3794
function get_at_bound_index(M::ProductManifold, X, b)
3895
return get_at_bound_index(M.manifolds[1], submanifold_component(M, X, Val(1)), b)
3996
end
@@ -306,8 +363,8 @@ function (::GenericFPFPPUpdater)(M::AbstractManifold, p, old_f_prime, old_f_doub
306363
end
307364

308365
function init_updater!(M::AbstractManifold, fpfpp_upd::LimitedMemoryFPFPPUpdater, d, ha::QuasiNewtonLimitedMemoryBoxDirectionUpdate)
309-
fpfpp_upd.c_s .= 0
310-
fpfpp_upd.c_y .= 0
366+
fill!(fpfpp_upd.c_s, 0)
367+
fill!(fpfpp_upd.c_y, 0)
311368
ii = 1
312369
for i in eachindex(ha.ρ)
313370
if iszero(ha.ρ[i])
@@ -420,7 +477,7 @@ function GCPFinder(
420477
end
421478

422479
"""
423-
find_gcp!(gcp::GCPFinder, d_out, p, d, X)
480+
find_gcp_direction!(gcp::GCPFinder, d_out, p, d, X)
424481
425482
Find generalized Cauchy point looking from point `p` in direction `d` and save the tangent
426483
vector pointing at it to `d_out`. Gradient of the objective at `p` is `X`.

src/plans/stepsize/stepsize.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,19 @@ function (a::ArmijoLinesearchStepsize)(
114114
)
115115
p = get_iterate(s)
116116
grad = isnothing(gradient) ? get_gradient(mp, get_iterate(s)) : gradient
117-
return a(mp, p, grad, η; initial_guess = a.initial_guess(mp, s, k, a.last_stepsize, η))
117+
return a(mp, p, grad, η; initial_guess = a.initial_guess(mp, s, k, a.last_stepsize, η), kwargs...)
118118
end
119119
function (a::ArmijoLinesearchStepsize)(
120-
mp::AbstractManoptProblem, p, X, η; initial_guess = 1.0, kwargs...
120+
mp::AbstractManoptProblem, p, X, η; initial_guess::Real = 1.0, kwargs...
121121
)
122122
reset_messages!(a.messages)
123123
l = norm(get_manifold(mp), p, η)
124+
local swse
125+
if :stop_when_stepsize_exceeds in keys(kwargs)
126+
swse = kwargs.stop_when_stepsize_exceeds
127+
else
128+
swse = (a.stop_when_stepsize_exceeds / l)
129+
end
124130
a.last_stepsize = linesearch_backtrack!(
125131
get_manifold(mp),
126132
a.candidate_point,
@@ -133,7 +139,7 @@ function (a::ArmijoLinesearchStepsize)(
133139
gradient = X,
134140
retraction_method = a.retraction_method,
135141
stop_when_stepsize_less = (a.stop_when_stepsize_less / l),
136-
stop_when_stepsize_exceeds = (a.stop_when_stepsize_exceeds / l),
142+
stop_when_stepsize_exceeds = swse,
137143
stop_increasing_at_step = a.stop_increasing_at_step,
138144
stop_decreasing_at_step = a.stop_decreasing_at_step,
139145
additional_decrease_condition = a.additional_decrease_condition,
@@ -1251,7 +1257,7 @@ mutable struct NonmonotoneLinesearchStepsize{
12511257
retraction_method::TRM = default_retraction_method(M),
12521258
stepsize_reduction::R = 0.5,
12531259
stop_when_stepsize_less::R = 0.0,
1254-
stop_when_stepsize_exceeds = real(max_stepsize(M)),
1260+
stop_when_stepsize_exceeds::R = real(max_stepsize(M)),
12551261
stop_increasing_at_step::I = 100,
12561262
stop_decreasing_at_step::I = 1000,
12571263
storage::Union{Nothing, StoreStateAction} = StoreStateAction(

src/solvers/quasi_Newton.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ function quasi_Newton!(
339339
O <: Union{AbstractManifoldFirstOrderObjective{E}, AbstractDecoratedManifoldObjective{E}},
340340
}
341341
keywords_accepted(quasi_Newton!; kwargs...)
342+
local local_dir_upd
342343
if memory_size >= 0
343344
local_dir_upd = QuasiNewtonLimitedMemoryDirectionUpdate(
344345
M,
@@ -351,6 +352,9 @@ function quasi_Newton!(
351352
nonpositive_curvature_behavior = nonpositive_curvature_behavior,
352353
sy_tol = sy_tol,
353354
)
355+
if requires_gcp(M)
356+
local_dir_upd = QuasiNewtonLimitedMemoryBoxDirectionUpdate(local_dir_upd)
357+
end
354358
else
355359
local_dir_upd = QuasiNewtonMatrixDirectionUpdate(
356360
M,
@@ -401,6 +405,7 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, k)
401405
M = get_manifold(mp)
402406
get_gradient!(mp, qns.X, qns.p)
403407
qns.direction_update(qns.η, mp, qns)
408+
# current_max_length = get_parameter(qns.direction_update, Val(:max_length))
404409
if !(qns.nondescent_direction_behavior === :ignore)
405410
qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X))
406411
if qns.nondescent_direction_value > 0

test/solvers/test_quasi_Newton_box.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ using LinearAlgebra: I, eigvecs, tr, Diagonal, dot
116116
f(M, p) = sum(p .^ 2)
117117
grad_f(M, p) = 2 .* p
118118
p0 = [0.0, 4.0, 10.0]
119-
p_opt = quasi_Newton(M, f, grad_f, p0)
119+
# p_opt = quasi_Newton(M, f, grad_f, p0)
120120
end
121121

122122
@testset "requires_gcp" begin

0 commit comments

Comments
 (0)