Skip to content

Commit 33d325a

Browse files
committed
Add comprehensive GPU solver test suite
17 tests verifying gradient_descent and conjugate_gradient_descent work transparently with CuArray-backed manifold points. No ManoptCUDAExt needed — the _produce_type fix from #577 handles GPU allocation natively. Tests cover: ConstantLength/ArmijoLinesearch stepsizes, Float32/Float64, matrix Euclidean, Sphere, recording with :Cost, CPU-vs-GPU equivalence. All tests use known closed-form solutions for verification.
1 parent 2bd767a commit 33d325a

4 files changed

Lines changed: 175 additions & 0 deletions

File tree

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
* A clarification on the use of AI in the [CONTRIBUTING.md](https://manoptjl.org/stable/contributing/) (#573)
1414
* `_produce_type` now accepts the point `p` as an optional third argument, which can be used to produce objects with specific point type for internal buffers. The addition has been utilized in `DirectionUpdateRule`s and `Stepsize`s to improve GPU and custom floating point type compatibility. (#577)
1515
* Added another package and paper using `Manopt.jl` to the about page (#576).
16+
* Added GPU/CUDA test suite (`test/test_cuda_ext.jl`) verifying that solvers (`gradient_descent`, `conjugate_gradient_descent`) work transparently with `CuArray`-backed manifold points. Tests cover `ConstantLength`, `ArmijoLinesearch` stepsizes, Float32/Float64, recording, Sphere, and CPU-vs-GPU equivalence. No solver extension is needed — the `_produce_type` fix from #577 handles GPU allocation natively.
1617

1718
### Fixed
1819

test/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
[deps]
22
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
3+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
34
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
45
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
56
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -22,6 +23,7 @@ Manopt = {path = ".."}
2223

2324
[compat]
2425
Aqua = "0.8"
26+
CUDA = "5"
2527
Dates = "1.10"
2628
ForwardDiff = "0.10, 1"
2729
JuMP = "1.15"

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ using Manifolds, ManifoldsBase, Manopt, Test
7272
include("solvers/test_trust_regions.jl")
7373
include("solvers/test_vectorbundle_newton.jl")
7474
end
75+
include("test_cuda_ext.jl")
7576
include("MOI_wrapper.jl")
7677
include("test_aqua.jl")
7778
include("test_deprecated.jl")

test/test_cuda_ext.jl

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
using Manopt, Manifolds, ManifoldsBase, Test
2+
using LinearAlgebra
3+
4+
@testset "GPU solver tests" begin
5+
cuda_loaded = false
6+
try
7+
using CUDA
8+
cuda_loaded = CUDA.functional()
9+
catch
10+
cuda_loaded = false
11+
end
12+
13+
if cuda_loaded
14+
@eval using CUDA
15+
16+
@testset "GD + ConstantLength on Euclidean" begin
17+
M = Euclidean(3)
18+
target_cpu = [1.0, 2.0, 3.0]
19+
target = CuArray(target_cpu)
20+
f(M, p) = sum((p .- target) .^ 2) / 2
21+
grad_f(M, p) = p .- target
22+
p0 = CuArray(zeros(3))
23+
24+
result = gradient_descent(
25+
M, f, grad_f, p0;
26+
stopping_criterion=StopAfterIteration(200),
27+
stepsize=ConstantLength(0.1),
28+
)
29+
@test result isa CuArray{Float64}
30+
@test isapprox(Array(result), target_cpu; atol=1e-6)
31+
end
32+
33+
@testset "GD + ArmijoLinesearch on Euclidean" begin
34+
M = Euclidean(3)
35+
target_cpu = [1.0, 2.0, 3.0]
36+
target = CuArray(target_cpu)
37+
f(M, p) = sum((p .- target) .^ 2) / 2
38+
grad_f(M, p) = p .- target
39+
p0 = CuArray(zeros(3))
40+
41+
result = gradient_descent(
42+
M, f, grad_f, p0;
43+
stopping_criterion=StopAfterIteration(50),
44+
)
45+
@test result isa CuArray{Float64}
46+
@test isapprox(Array(result), target_cpu; atol=1e-5)
47+
end
48+
49+
@testset "GD + ConstantLength Float32" begin
50+
T = Float32
51+
M = Euclidean(3)
52+
target_cpu = T[1.0, 2.0, 3.0]
53+
target = CuArray(target_cpu)
54+
f(M, p) = sum((p .- target) .^ 2) / T(2)
55+
grad_f(M, p) = p .- target
56+
p0 = CUDA.zeros(T, 3)
57+
58+
result = gradient_descent(
59+
M, f, grad_f, p0;
60+
stopping_criterion=StopAfterIteration(200),
61+
stepsize=ConstantLength(T(0.1)),
62+
)
63+
@test result isa CuArray{Float32}
64+
@test isapprox(Array(result), target_cpu; atol=T(1e-3))
65+
end
66+
67+
@testset "GD on matrix-valued Euclidean" begin
68+
M = Euclidean(3, 3)
69+
target_cpu = randn(3, 3)
70+
target = CuArray(target_cpu)
71+
f(M, p) = sum((p .- target) .^ 2) / 2
72+
grad_f(M, p) = p .- target
73+
p0 = CuArray(zeros(3, 3))
74+
75+
result = gradient_descent(
76+
M, f, grad_f, p0;
77+
stopping_criterion=StopAfterIteration(200),
78+
stepsize=ConstantLength(0.1),
79+
)
80+
@test result isa CuArray{Float64,2}
81+
@test size(result) == (3, 3)
82+
@test isapprox(Array(result), target_cpu; atol=1e-6)
83+
end
84+
85+
@testset "Conjugate GD on Euclidean" begin
86+
M = Euclidean(5)
87+
target_cpu = randn(5)
88+
target = CuArray(target_cpu)
89+
f(M, p) = sum((p .- target) .^ 2) / 2
90+
grad_f(M, p) = p .- target
91+
p0 = CuArray(zeros(5))
92+
93+
result = conjugate_gradient_descent(
94+
M, f, grad_f, p0;
95+
stopping_criterion=StopAfterIteration(50),
96+
stepsize=ConstantLength(0.1),
97+
)
98+
@test result isa CuArray{Float64}
99+
@test isapprox(Array(result), target_cpu; atol=1e-3)
100+
end
101+
102+
@testset "GD on Sphere" begin
103+
M = Sphere(2)
104+
a_cpu = [1.0, 2.0, 3.0]
105+
known_solution = a_cpu / norm(a_cpu)
106+
a = CuArray(a_cpu)
107+
f(M, p) = sum((p .- a) .^ 2) / 2
108+
grad_f(M, p) = project(M, p, p .- a)
109+
110+
s_cpu = [0.0, 0.0, 1.0]
111+
p0 = CuArray(s_cpu)
112+
113+
result = gradient_descent(
114+
M, f, grad_f, p0;
115+
stopping_criterion=StopAfterIteration(100),
116+
stepsize=ConstantLength(0.1),
117+
)
118+
@test result isa CuArray{Float64}
119+
@test isapprox(norm(Array(result)), 1.0; atol=1e-10)
120+
@test isapprox(Array(result), known_solution; atol=1e-4)
121+
end
122+
123+
@testset "GD + recording on Euclidean" begin
124+
M = Euclidean(3)
125+
target = CuArray([1.0, 2.0, 3.0])
126+
f(M, p) = sum((p .- target) .^ 2) / 2
127+
grad_f(M, p) = p .- target
128+
p0 = CuArray(zeros(3))
129+
130+
result = gradient_descent(
131+
M, f, grad_f, p0;
132+
stopping_criterion=StopAfterIteration(20),
133+
stepsize=ConstantLength(0.1),
134+
record=[:Cost],
135+
return_state=true,
136+
)
137+
rec = get_record(result)
138+
@test length(rec) == 20
139+
p_final = get_solver_result(result)
140+
@test p_final isa CuArray{Float64}
141+
end
142+
143+
@testset "CPU vs GPU equivalence" begin
144+
M = Euclidean(5)
145+
target_cpu = randn(5)
146+
target_gpu = CuArray(target_cpu)
147+
148+
f_cpu(M, p) = sum((p .- target_cpu) .^ 2) / 2
149+
grad_f_cpu(M, p) = p .- target_cpu
150+
f_gpu(M, p) = sum((p .- target_gpu) .^ 2) / 2
151+
grad_f_gpu(M, p) = p .- target_gpu
152+
153+
p0_cpu = zeros(5)
154+
p0_gpu = CuArray(zeros(5))
155+
156+
result_cpu = gradient_descent(
157+
M, f_cpu, grad_f_cpu, p0_cpu;
158+
stopping_criterion=StopAfterIteration(100),
159+
stepsize=ConstantLength(0.1),
160+
)
161+
result_gpu = gradient_descent(
162+
M, f_gpu, grad_f_gpu, p0_gpu;
163+
stopping_criterion=StopAfterIteration(100),
164+
stepsize=ConstantLength(0.1),
165+
)
166+
@test isapprox(Array(result_gpu), result_cpu; atol=1e-10)
167+
end
168+
else
169+
@info "CUDA not functional, skipping GPU solver tests"
170+
end
171+
end

0 commit comments

Comments
 (0)