diff --git a/Project.toml b/Project.toml index 1450621..d0cc928 100644 --- a/Project.toml +++ b/Project.toml @@ -21,6 +21,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] ADTypes = "1.11.0" +CUDA = "5" ChainRules = "1" ChainRulesCore = "1" DifferentiationInterface = "0.6.23, 0.7" @@ -39,6 +40,7 @@ julia = "1.10" [extras] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" @@ -58,6 +60,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" test = [ "Test", "ADTypes", + "CUDA", "ChainRules", "ChainRulesCore", "DifferentiationInterface", diff --git a/test/runtests.jl b/test/runtests.jl index 6927ed2..ff5864e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,4 +18,5 @@ using RecursiveArrayTools include("test_proximal_maps.jl") include("test_subgradients.jl") include("test_jacobians.jl") + include("test_cuda_ext.jl") end diff --git a/test/test_cuda_ext.jl b/test/test_cuda_ext.jl new file mode 100644 index 0000000..a6ef175 --- /dev/null +++ b/test/test_cuda_ext.jl @@ -0,0 +1,120 @@ +using ManifoldDiff +using Manifolds +using ManifoldsBase +using LinearAlgebra +using Test + +@testset "ManifoldDiff GPU AD gradients" begin + cuda_loaded = false + try + using CUDA + cuda_loaded = CUDA.functional() + catch + cuda_loaded = false + end + + if cuda_loaded + @eval using CUDA + + # Note: ForwardDiff does NOT support CuArrays (scalar indexing in seed!). + # Only Zygote (reverse-mode) is tested here. + zygote_loaded = false + try + using Zygote + using ADTypes: AutoZygote + zygote_loaded = true + catch + zygote_loaded = false + end + + if zygote_loaded + @eval using Zygote + @eval using ADTypes: AutoZygote + + @testset "Euclidean gradient — Zygote Float64" begin + M = Euclidean(3) + backend = ManifoldDiff.RiemannianProjectionBackend(AutoZygote()) + + # f(p) = sum(p.^2) / 2, analytical grad = p + f(p) = sum(p .^ 2) / 2 + + p_cpu = Float64[1.0, 2.0, 3.0] + p = CuArray(p_cpu) + + grad = ManifoldDiff.gradient(M, f, p, backend) + @test grad isa CuArray{Float64} + @test isapprox(Array(grad), p_cpu; atol=1e-10) + end + + @testset "Euclidean gradient — Zygote Float32" begin + M = Euclidean(3) + backend = ManifoldDiff.RiemannianProjectionBackend(AutoZygote()) + + f(p) = sum(p .^ 2) / 2f0 + + p_cpu = Float32[1.0, 2.0, 3.0] + p = CuArray(p_cpu) + + grad = ManifoldDiff.gradient(M, f, p, backend) + @test grad isa CuArray{Float32} + @test isapprox(Array(grad), p_cpu; atol=1e-4) + end + + @testset "Sphere gradient — Zygote" begin + M = Sphere(2) + backend = ManifoldDiff.RiemannianProjectionBackend(AutoZygote()) + + # Use dot product instead of p[1] to avoid scalar indexing. + # f(p) = dot(a, p) where a = [1,0,0] + # Euclidean grad = a, Riemannian grad = a - dot(a,p)*p + a = CuArray([1.0, 0.0, 0.0]) + f(p) = dot(a, p) + + p_cpu = [sqrt(2.0) / 2, 0.0, sqrt(2.0) / 2] + p = CuArray(p_cpu) + a_cpu = [1.0, 0.0, 0.0] + expected_grad = a_cpu - dot(a_cpu, p_cpu) * p_cpu + + grad = ManifoldDiff.gradient(M, f, p, backend) + @test grad isa CuArray{Float64} + # Should be in tangent space: dot(grad, p) ≈ 0 + @test abs(dot(Array(grad), p_cpu)) < 1e-10 + @test isapprox(Array(grad), expected_grad; atol=1e-10) + end + + @testset "CPU vs GPU gradient equivalence — Zygote" begin + M = Euclidean(5) + backend = ManifoldDiff.RiemannianProjectionBackend(AutoZygote()) + + f(p) = sum(p .^ 2) / 2 + p_cpu = randn(5) + p_gpu = CuArray(p_cpu) + + grad_cpu = ManifoldDiff.gradient(M, f, p_cpu, backend) + grad_gpu = ManifoldDiff.gradient(M, f, p_gpu, backend) + @test grad_gpu isa CuArray{Float64} + @test isapprox(Array(grad_gpu), grad_cpu; atol=1e-12) + end + + @testset "Quadratic objective — Zygote" begin + M = Euclidean(4) + backend = ManifoldDiff.RiemannianProjectionBackend(AutoZygote()) + + target = CuArray([1.0, 2.0, 3.0, 4.0]) + f(p) = sum((p .- target) .^ 2) / 2 + + p_cpu = zeros(4) + p = CuArray(p_cpu) + + grad = ManifoldDiff.gradient(M, f, p, backend) + @test grad isa CuArray{Float64} + # grad = p - target = [0,0,0,0] - [1,2,3,4] = [-1,-2,-3,-4] + @test isapprox(Array(grad), p_cpu .- Array(target); atol=1e-10) + end + else + @info "Zygote not available, skipping GPU AD tests" + end + else + @info "CUDA not functional, skipping ManifoldDiff GPU AD tests" + end +end