Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/documenter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
version: "1.7.29"
- uses: julia-actions/setup-julia@latest
with:
version: "1.12"
version: "1.12.2"
- name: Julia Cache
uses: julia-actions/cache@v2
- name: Cache Quarto
Expand Down
8 changes: 8 additions & 0 deletions docs/src/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,14 @@ @article{SakaiIiduka:2021
JOURNAL = {Journal of Optimization Theory and Applications},
PAGES = {130–150}
}
@article{SakaiIiduka:2024,
author = {Sakai, Hiroyuki and Iiduka, Hideaki},
eprint = {2409.008559},
eprinttype = {arXiv},
journal = {preprint},
title = {A general framework of Riemannian adaptive optimization methods with a convergence analysis},
year = {2024},
}
@article{SouzaOliveira:2015,
AUTHOR = {J. C. O. Souza and P. R. Oliveira},
DOI = {10.1007/s10898-015-0282-7},
Expand Down
12 changes: 10 additions & 2 deletions docs/src/solvers/gradient_descent.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ GradientDescentState
A field of the options is the `direction`, a [`DirectionUpdateRule`](@ref), which by default [`IdentityUpdateRule`](@ref) just evaluates the gradient but can be enhanced for example to

```@docs
AdaptiveDirection
AverageGradient
DirectionUpdateRule
IdentityUpdateRule
Expand All @@ -28,10 +29,17 @@ Nesterov
PreconditionedDirection
```

which internally use the [`ManifoldDefaultsFactory`](@ref) and produce the internal
elements
where the [`AdaptiveDirection`](@ref) can be configured with different adaptive rules

```@docs
BasicDirection
AdamDirection
```

Internally the direction rules use the [`ManifoldDefaultsFactory`](@ref) and produce the (not exported) actual rules

```@docs
Manopt.AdaptiveDirectionRule
Manopt.AverageGradientRule
Manopt.ConjugateDescentCoefficientRule
Manopt.MomentumGradientRule
Expand Down
3 changes: 2 additions & 1 deletion src/Manopt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ export AbstractMeshSearchFunction, DefaultMeshAdaptiveDirectSearch
# Direction Update Rules
export DirectionUpdateRule
export Gradient, StochasticGradient
export AdaptiveDirection, BasicDirection, AdamDirection
export AverageGradient, MomentumGradient, Nesterov, PreconditionedDirection
export SteepestDescentCoefficient,
HestenesStiefelCoefficient,
Expand Down Expand Up @@ -538,7 +539,7 @@ export SmoothingTechnique, LinearQuadraticHuber, LogarithmicSumOfExponentials
# Stepsize
export Stepsize
export AdaptiveWNGradient, AffineCovariantStepsize, ConstantLength, DecreasingLength,
Polyak, DistanceOverGradients, DistanceOverGradientsStepsize
Polyak, DistanceOverGradients
export ProximalGradientMethodBacktracking
export ArmijoLinesearch, Linesearch, NonmonotoneLinesearch, CubicBracketingLinesearch
export get_stepsize, get_initial_stepsize, get_last_stepsize
Expand Down
220 changes: 220 additions & 0 deletions src/plans/first_order_plan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,9 @@ function set_iterate!(agst::AbstractGradientSolverState, M, p)
return agst
end

#
#
# Direction Update Rules -------------------------------------------------------------------
"""
DirectionUpdateRule

Expand Down Expand Up @@ -1032,6 +1035,223 @@ function PreconditionedDirection(args...; kwargs...)
return ManifoldDefaultsFactory(Manopt.PreconditionedDirectionRule, args...; kwargs...)
end

"""
AdaptiveDirectionRule{E<:AbstractEvaluationType, T} <: DirectionUpdateRule

A direction update rule that follows the proposed general framework of [SakaiIiduka:2024](@cite)
using a memory of previous gradients to adaptively compute a new direction in two steps

1. Apply a direction computation function ``ϕ_k`` to compute a new direction based on the memory of recorded gradients.
2. Apply a precondition function ``ψ_k`` to the computed direction to obtain the final direction.

In the following we use `M` as the manifold, `p` as the current iterate, `d` as an instance of
this direction update rule itself, and `k` as the current iteration number

# Fields

* `memory_gradients`: circular buffer storing the last ``m`` gradients
* `direction`: a function or functor that maps – depending on `E` – either
* `(M, p, d, k) -> X` allocating a new direction `X`
* `(M, X, p, d, k) -> X` mutating the direction `X`
* `precondition`: a function or functor that maps – depending on `E` – either
* `(M, p, d, X, k) -> Y` allocating a new preconditioned direction `Y`
* `(M, Y, p, d, X, k) -> Y` mutating the preconditioned direction `pd`
* `p_memory`: the point indicating the tangent space the memory of gradients are in.
* $(_var(:Field, :vector_transport_method))

# Constructors

AdaptiveDirectionRule(
M::AbstractManifold, direction, preconditioning;
memory_size::Int = 10,
p = rand(M),
X = zero_vector(M, p),
evaluation::E = AllocatingEvaluation(),
vector_transport_method::VTM = default_vector_transport_method(M, typeof(p)),
) where {E <: EvaluationType}

Add an adaptive direction update rule to a gradient problem, where
* `memory_size` is the number of gradients stored
* `X` denotes the type of the stored gradients
* `evaluation` specifies whether the direction and preconditioning functions work in-place or allocate new memory


"""
struct AdaptiveDirectionRule{E <: AbstractEvaluationType, D, PC, P, T, VTM} <: DirectionUpdateRule
memory_gradients::CircularBuffer{T}
direction::D
preconditioning::PC
p_memory::P
X_memory::T
vector_transport_method::VTM
end
function AdaptiveDirectionRule(
M::AbstractManifold,
direction::D,
precondition::PC;
memory_size::Int = 10,
p::P = rand(M),
X::T = zero_vector(M, p),
evaluation::E = AllocatingEvaluation(),
vector_transport_method::VTM = default_vector_transport_method(
M, typeof(p)
),
) where {E <: AbstractEvaluationType, D, PC, P, T, VTM <: AbstractVectorTransportMethod}
gradient_memory = CircularBuffer{T}(memory_size)
return AdaptiveDirectionRule{E, D, PC, P, T, VTM}(gradient_memory, direction, precondition, p, X, vector_transport_method)
end
function (ad::AdaptiveDirectionRule{AllocatingEvaluation})(
mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
)
M = get_manifold(mp)
p = get_iterate(s)
X = get_gradient(s)
update_adaptive_memory!(ad, mp, s, X)
ad.X_memory = ad.direction(M, p, ad, k) # compute direction
ad.X_memory = ad.preconditioning(M, p, ad, ad.X_memory, k) # precondition direction
return get_stepsize(mp, s, k; gradient = X, η = -ad.X_memory), ad.X_memory
end
function (ad::AdaptiveDirectionRule{InplaceEvaluation})(
mp::AbstractManoptProblem, s::AbstractGradientSolverState, k
)
M = get_manifold(mp)
p = get_iterate(s)
X = get_gradient(s)
update_adaptive_memory!(ad, mp, s, X)
ad.direction(M, ad.X_memory, p, ad, k) # compute direction
ad.preconditioning(M, ad.X_memory, p, ad, ad.X_memory, k) # precondition direction
return get_stepsize(mp, s, k; gradient = X, η = -ad.X_memory), ad.X_memory
end

#
# Both previous functions update the inner memory:
# 1. transport all old gradients to the new tangent space
# 2. add the current gradient to the memory (popping the oldest if necessary)
function update_adaptive_memory!(
ad::AdaptiveDirectionRule, mp::AbstractManoptProblem, s::AbstractGradientSolverState, X = get_gradient(s)
)
M = get_manifold(mp)
p = get_iterate(s)
# transport memory to current tangent space
start = length(ad.memory_gradients) == capacity(ad.memory_gradients) ? 2 : 1
for i in start:length(ad.memory_gradients)
# transport all stored tangent vectors in the tangent space of the next iterate
vector_transport_to!(
M, ad.memory_gradients[i], ad.p_memory, ad.memory_gradients[i], p, ad.vector_transport_method
)
end
copyto!(M, ad.p_memory, p)
# if memory is full, pop first - but reuse memory
if isfull(ad.memory_gradients)
Y = popfirst!(ad.memory_gradients)
copyto!(M, Y, p, X)
push!(ad.memory_gradients, Y)
else
# store current gradient
push!(ad.memory_gradients, copy(M, p, X))
end
return ad
end

"""
AdaptiveDirection(direction, precondition; kwargs...)
AdaptiveDirection(M::AbstractManifold, direction, precondition; kwargs...)

1. Apply a direction computation function ``ϕ_k`` to compute a new direction based on the memory of recorded gradients.
2. Apply a precondition function ``ψ_k`` to the computed direction to obtain the final direction.

Note that compared to [SakaiIiduka:2024](@cite), the second step is deonted slightly differently here,
it directly performs the preconditioning and does not (first) return ``H_k`` (the inverse of) a preconditioner.

In the following we use `M` as the manifold, `p` as the current iterate, `d` as an instance of
this direction update rule itself, and `k` as the current iteration number

# Arguments

$(_var(:Argument, :M; type = true)) (optional)
* `direction`: a function or functor that maps either
* `(M, p, d, k) -> X` allocating a new direction `X`
* `(M, X, p, d, k) -> X` mutating the direction `X`
* `preconditioning`: a function or functor that maps either
* `(M, p, d, k) -> X` allocating a new preconditioned direction `X`
* `(M, X, p, d, k) -> X` mutating the preconditioned direction `X`

# Keyword arguments

$(_var(:Keyword, :evaluation))

$(_note(:ManifoldDefaultFactory, "PreconditionedDirectionRule"))
"""
function AdaptiveDirection(M::AbstractManifold, precondition, direction; kwargs...)
return ManifoldDefaultsFactory(
Manopt.AdaptiveDirectionRule, M, direction, precondition; kwargs...,
)
end

"""
BasicDirection

A simple direction function for [`AdaptiveDirectionRule`](@ref) that just returns
the last stored gradient from the memory, i.e.

```math
ϕ_k(g_1,…,g_m) = g_m
```
"""
struct BasicDirection end
function (bd::BasicDirection)(M::AbstractManifold, p, ad::AdaptiveDirectionRule, k)
return ad.memory_gradients[end]
end
function (bd::BasicDirection)(M::AbstractManifold, X, p, ad::AdaptiveDirectionRule, k)
return copyto!(M, X, p, ad.memory_gradients[end])
end

"""
AdamDirection

A direction function for [`AdaptiveDirectionRule`](@ref) that implements the ADAM rule adapted to Riemannian manifolds as

```math
m_k = β m_{k-1} + (1-β) g_k,
$(_tex(:qquad))
ϕ_k(g_1,…,g_m) = $(_tex(:frac, "m_k", "1-β^k"))
```

where ``g_k`` is the current gradient, ``m_k`` is the first moment estimate, and ``β ∈(0,1)`` is a decay rate.

# Fields

* `β::Real`: decay rate for the first moment estimate
* `m::T`: storage for the first moment estimate

# Constructors

AdamDirection(M::AbstractManifold; β=0.9, p=rand(M), X=zero_vector(M,p))
"""
struct AdamDirection{R <: Real, T}
β::R
m::T
end
function AdamDirection(
M::AbstractManifold;
β::F = 0.9,
p::P = rand(M),
X::T = zero_vector(M, p),
) where {F <: Real, P, T}
return AdamDirection{F, T}(β, copy(M, p, X))
end
function (ad::AdamDirection)(M::AbstractManifold, p, adr::AdaptiveDirectionRule, k)
g = adr.memory_gradients[end]
copyto!(M, ad.m, p, (1 - ad.β) * g + ad.β * ad.m)
return (1 / (1 - ad.β^k)) * ad.m
end
function (ad::AdamDirection)(M::AbstractManifold, X, p, adr::AdaptiveDirectionRule, k)
g = adr.memory_gradients[end]
copyto!(M, ad.m, p, (1 - ad.β) * g + ad.β * ad.m)
copyto!(M, X, p, (1 / (1 - ad.β^k)) * ad.m)
return X
end

"""
AbstractRestartCondition

Expand Down
14 changes: 5 additions & 9 deletions src/plans/quasi_newton_plan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -582,13 +582,9 @@ $(_var(:Field, :vector_transport_method))
[`AbstractQuasiNewtonDirectionUpdate`](@ref)
"""
mutable struct QuasiNewtonLimitedMemoryDirectionUpdate{
NT <: AbstractQuasiNewtonUpdateRule,
T,
F,
V <: AbstractVector{F},
G <: Union{F, Nothing},
VT <: AbstractVectorTransportMethod,
Proj,
NT <: AbstractQuasiNewtonUpdateRule, T, F,
V <: AbstractVector{F}, G <: Union{F, Nothing},
VT <: AbstractVectorTransportMethod, Proj,
} <: AbstractQuasiNewtonDirectionUpdate
memory_s::CircularBuffer{T}
memory_y::CircularBuffer{T}
Expand Down Expand Up @@ -754,8 +750,8 @@ taking into account that the corresponding step size is chosen.

# Provided functors

* `(mp::AbstractManoptProblem, st::QuasiNewtonState) -> η` to compute the update direction
* `(η, mp::AbstractManoptProblem, st::QuasiNewtonState) -> η` to compute the update direction in-place of `η`
* `(mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction
* `(η, mp::AbstractManoptproblem, st::QuasiNewtonState) -> η` to compute the update direction in-place of `η`

# Fields

Expand Down
10 changes: 5 additions & 5 deletions test/plans/test_stepsize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ end
dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f))
p = [2.0, 2.0]
gds = GradientDescentState(M; p = p)
ds = DistanceOverGradientsStepsize(
ds = Manopt.DistanceOverGradientsStepsize(
M; p = p, initial_distance = 1.0, use_curvature = false
)
@test ds.gradient_sum == 0
Expand Down Expand Up @@ -295,7 +295,7 @@ end
dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f))
p = [2.0, 2.0]
gds = GradientDescentState(M; p = p)
ds = DistanceOverGradientsStepsize(
ds = Manopt.DistanceOverGradientsStepsize(
M;
p = p,
initial_distance = 1.0,
Expand All @@ -318,7 +318,7 @@ end
dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f))
p = [1, 0]
gds = GradientDescentState(M; p = p)
ds = DistanceOverGradientsStepsize(
ds = Manopt.DistanceOverGradientsStepsize(
M; p = p, initial_distance = 1.0, use_curvature = false
)
@test ds.gradient_sum == 0
Expand All @@ -337,7 +337,7 @@ end
dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f))
p = [1, 0]
gds = GradientDescentState(M; p = p)
ds = DistanceOverGradientsStepsize(
ds = Manopt.DistanceOverGradientsStepsize(
M;
p = p,
initial_distance = 1.0,
Expand Down Expand Up @@ -367,7 +367,7 @@ end

dmp = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f))
gds = GradientDescentState(M; p = p)
ds = DistanceOverGradientsStepsize(
ds = Manopt.DistanceOverGradientsStepsize(
M;
p = p,
initial_distance = 1.0,
Expand Down
Loading