Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 6 additions & 18 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name = "ManifoldsGPU"
uuid = "007d1224-8888-47ee-87d0-87e096ff9b5b"
version = "0.1.0-DEV"
version = "0.1.0"
authors = ["Mateusz Baran <mateuszbaran89@gmail.com>", "Shiwen An <sweynan@icloud.com>", "and contributors"]

[workspace]
projects = ["test", "docs"]

[deps]
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd"
Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
Runic = "62bfec6d-59d7-401d-8490-b29ee721c001"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Expand All @@ -20,20 +20,8 @@ ManifoldsGPUCUDAExt = "CUDA"

[compat]
CUDA = "5.9.6"
GPUArrays = "11"
JLArrays = "0.3"
LinearAlgebra = "1.10"
ManifoldDiff = "0.4.5"
Manifolds = "0.11.12"
ManifoldsBase = "2.3.1"
Runic = "1.5.1"
julia = "1.10.10"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "CUDA", "GPUArrays", "JLArrays", "Random"]
julia = "1.10"
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# ManifoldsGPU

[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliamanifolds.github.io/ManifoldsGPU.jl/dev/)
[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
[![code style: runic](https://img.shields.io/badge/code_style-%E1%9A%B1%E1%9A%A2%E1%9A%BE%E1%9B%81%E1%9A%B2-black)](https://github.com/fredrikekre/Runic.jl)

General GPU/CUDA support for the JuliaManifolds ecosystem.

Expand Down
63 changes: 33 additions & 30 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,37 @@ ManifoldsGPU.ManifoldsGPU

Device: NVIDIA GeForce RTX 5070 Ti, eltype: Float32/ComplexF32

| Manifold | Operation | CPU median [ms] | GPU median [ms] | Speedup CPU/GPU | Relative error |
| Manifold | Operation | CPU median [ms] | GPU median [ms] | Speedup CPU/GPU | Error |
| --- | --- | ---: | ---: | ---: | ---: |
| Euclidean(32, 16, 2048) | exp | 0.36 | 0.17 | 2.17 | 0.0 |
| Euclidean(32, 16, 2048) | log! | 0.36 | 0.16 | 2.19 | 0.0 |
| Euclidean(32, 16, 2048) | inner | 0.19 | 0.14 | 1.36 | 9.357e-8 |
| Euclidean(32, 16, 2048) | norm | 0.12 | 0.16 | 0.76 | 8.423e-8 |
| Euclidean(32, 16, 2048) | project! | 0.25 | 0.13 | 1.93 | 0.0 |
| PowerManifold(Sphere(31), 2048) | exp | 0.05 | 46.57 | 0.0 | 6.877e-8 |
| PowerManifold(Sphere(31), 2048) | log! | 0.16 | 82.22 | 0.0 | 4.262e-8 |
| PowerManifold(Sphere(31), 2048) | inner | 0.02 | 0.12 | 0.13 | 5.86e-7 |
| PowerManifold(Sphere(31), 2048) | norm | 0.02 | 0.12 | 0.15 | 1.064e-7 |
| PowerManifold(Sphere(31), 2048) | project! | 0.03 | 41.71 | 0.0 | 2.813e-8 |
| PowerManifold(Rotations(32), 2048) | exp | 36.47 | 2.36 | 15.47 | 2.594e-6 |
| PowerManifold(Rotations(32), 2048) | log! | 570.16 | 78.98 | 7.22 | 9.157e-5 |
| PowerManifold(Rotations(32), 2048) | inner | 0.38 | 0.15 | 2.58 | 4.708e-6 |
| PowerManifold(Rotations(32), 2048) | norm | 1.2 | 0.15 | 7.82 | 9.132e-7 |
| PowerManifold(Rotations(32), 2048) | project! | 21.69 | 0.23 | 94.53 | 3.644e-7 |
| PowerManifold(Rotations(32), 2048) | retract_fused!(PolarRetraction) | 116.78 | 5.21 | 22.41 | 2.555e-6 |
| PowerManifold(UnitaryMatrices(32), 2048) | exp | 85.95 | 7.66 | 11.21 | 1.957e-6 |
| PowerManifold(UnitaryMatrices(32), 2048) | log! | 739.96 | 69.31 | 10.68 | 0.0001844 |
| PowerManifold(UnitaryMatrices(32), 2048) | inner | 0.76 | 58.76 | 0.01 | 5.979e-5 |
| PowerManifold(UnitaryMatrices(32), 2048) | norm | 1.73 | 45.7 | 0.04 | 1.516e-6 |
| PowerManifold(UnitaryMatrices(32), 2048) | project! | 31.55 | 0.36 | 88.48 | 5.512e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | exp | 69.77 | 5.36 | 13.01 | 7.023e-5 |
| PowerManifold(Grassmann(32, 16), 2048) | log! | 57.62 | 3.3 | 17.48 | 2.332e-5 |
| PowerManifold(Grassmann(32, 16), 2048) | inner | 0.19 | 0.14 | 1.41 | 8.675e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | norm | 0.86 | 0.13 | 6.58 | 2.772e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | project! | 0.96 | 0.19 | 4.97 | 1.303e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | retract_fused!(PolarRetraction) | 40.89 | 2.69 | 15.18 | 1.338e-6 |
| PowerManifold(Stiefel(32, 16), 2048) | exp(ExponentialRetraction) | 70.67 | 3.52 | 20.07 | 1.164e-6 |
| PowerManifold(Stiefel(32, 16), 2048) | retract_fused!(PolarRetraction) | 43.1 | 2.85 | 15.13 | 1.37e-6 |
| Euclidean(32, 16, 2048) | exp | 0.35 | 0.17 | 2.06 | 0.0 |
| Euclidean(32, 16, 2048) | log! | 0.35 | 0.17 | 2.05 | 0.0 |
| Euclidean(32, 16, 2048) | inner | 0.19 | 0.14 | 1.34 | 9.357e-8 |
| Euclidean(32, 16, 2048) | norm | 0.13 | 0.16 | 0.83 | 8.423e-8 |
| Euclidean(32, 16, 2048) | project! | 0.23 | 0.12 | 1.89 | 0.0 |
| PowerManifold(Sphere(31), 2048) | exp | 0.05 | 0.14 | 0.35 | 7.092e-8 |
| PowerManifold(Sphere(31), 2048) | log! | 0.08 | 0.37 | 0.23 | 5.125e-8 |
| PowerManifold(Sphere(31), 2048) | inner | 0.02 | 0.13 | 0.14 | 5.86e-7 |
| PowerManifold(Sphere(31), 2048) | norm | 0.02 | 0.13 | 0.15 | 1.064e-7 |
| PowerManifold(Sphere(31), 2048) | project! | 0.03 | 0.15 | 0.18 | 2.819e-8 |
| PowerManifold(Rotations(32), 2048) | exp | 36.19 | 2.35 | 15.38 | 2.594e-6 |
| PowerManifold(Rotations(32), 2048) | log! | 565.72 | 74.44 | 7.6 | 9.157e-5 |
| PowerManifold(Rotations(32), 2048) | inner | 0.41 | 0.25 | 1.65 | 4.708e-6 |
| PowerManifold(Rotations(32), 2048) | norm | 1.36 | 0.14 | 9.53 | 1.109e-6 |
| PowerManifold(Rotations(32), 2048) | project! | 20.46 | 0.22 | 91.22 | 3.644e-7 |
| PowerManifold(Rotations(32), 2048) | retract_fused!(PolarRetraction) | 115.51 | 4.89 | 23.62 | 2.555e-6 |
| PowerManifold(Rotations(32), 2048) | retract_fused!(QRRetraction) | 90.11 | 0.83 | 108.95 | 3.204e-7 |
| PowerManifold(UnitaryMatrices(32), 2048) | exp | 85.89 | 8.07 | 10.64 | 1.957e-6 |
| PowerManifold(UnitaryMatrices(32), 2048) | log! | 729.57 | 69.89 | 10.44 | 0.0001844 |
| PowerManifold(UnitaryMatrices(32), 2048) | inner | 0.83 | 56.26 | 0.01 | 5.979e-5 |
| PowerManifold(UnitaryMatrices(32), 2048) | norm | 1.74 | 44.17 | 0.04 | 1.516e-6 |
| PowerManifold(UnitaryMatrices(32), 2048) | project! | 31.42 | 0.35 | 90.36 | 5.512e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | exp | 69.72 | 5.27 | 13.22 | 7.023e-5 |
| PowerManifold(Grassmann(32, 16), 2048) | log! | 57.99 | 3.36 | 17.27 | 2.332e-5 |
| PowerManifold(Grassmann(32, 16), 2048) | inner | 0.2 | 0.13 | 1.54 | 8.056e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | norm | 0.81 | 0.14 | 5.95 | 3.696e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | project! | 1.0 | 0.23 | 4.45 | 1.303e-7 |
| PowerManifold(Grassmann(32, 16), 2048) | retract_fused!(PolarRetraction) | 40.67 | 2.99 | 13.59 | 0.0001873 |
| PowerManifold(Grassmann(32, 16), 2048) | retract_fused!(QRRetraction) | 17.84 | 0.72 | 24.63 | 4.623e-5 |
| PowerManifold(Stiefel(32, 16), 2048) | exp(ExponentialRetraction) | 70.99 | 3.56 | 19.91 | 1.164e-6 |
| PowerManifold(Stiefel(32, 16), 2048) | retract_fused!(PolarRetraction) | 43.66 | 2.87 | 15.21 | 1.37e-6 |
| PowerManifold(Stiefel(32, 16), 2048) | retract_fused!(QRRetraction) | 18.28 | 0.71 | 25.89 | 1.885e-7 |
58 changes: 58 additions & 0 deletions ext/ManifoldsGPUCUDAExt/Sphere.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,61 @@ function ManifoldsBase.norm(
) where {T <: Real}
return sqrt(dot(X, X))
end

function ManifoldsBase.exp!(
::PowerManifold{ℝ, <:Sphere{ℝ}, <:Tuple, ArrayPowerRepresentation},
q::CuArray{T},
p::CuArray{T},
X::CuArray{T},
) where {T <: Real}
θ = sqrt.(sum(abs2, X; dims = 1))
q .= cos.(θ) .* p .+ Manifolds.usinc.(θ) .* X
return q
end

function ManifoldsBase.log!(
M::PowerManifold{ℝ, <:Sphere{ℝ}, <:Tuple, ArrayPowerRepresentation},
X::CuArray{T},
p::CuArray{T},
q::CuArray{T},
) where {T <: Real}
cosθ = clamp.(sum(p .* q; dims = 1), -one(T), one(T))
θ = acos.(cosθ)

X_regular = (q .- cosθ .* p) ./ Manifolds.usinc.(θ)

antipodal = abs.(cosθ .+ one(T)) .<= sqrt(eps(T))
basis = CUDA.zeros(T, size(p))
basis[1, :] .= one(T)
if size(p, 1) > 1
p1_is_one = abs.(p[1:1, :] .- one(T)) .<= sqrt(eps(T))
basis[1, :] .-= T.(p1_is_one[1, :])
basis[2, :] .= T.(p1_is_one[1, :])
end

X_antipodal = basis .- p .* sum(p .* basis; dims = 1)
X_antipodal .*= T(π) ./ sqrt.(sum(abs2, X_antipodal; dims = 1))

X .= ifelse.(antipodal, X_antipodal, X_regular)
return project!(M, X, p, X)
end

function ManifoldsBase.project!(
::PowerManifold{ℝ, <:Sphere{ℝ}, <:Tuple, ArrayPowerRepresentation},
Y::CuArray{T},
p::CuArray{T},
X::CuArray{T},
) where {T <: Real}
Y .= X .- p .* sum(p .* X; dims = 1)
return Y
end

function ManifoldsBase.project!(
::PowerManifold{ℝ, <:Sphere{ℝ}, <:Tuple, ArrayPowerRepresentation},
q::CuArray{T},
p::CuArray{T},
) where {T <: Real}
norms_p = sqrt.(sum(abs2, p; dims = 1))
q .= p ./ norms_p
return q
end
27 changes: 27 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd"
Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
ManifoldsGPU = "007d1224-8888-47ee-87d0-87e096ff9b5b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[sources]
ManifoldsGPU = {path = ".."}

[compat]
Aqua = "0.8"
CUDA = "5.9.6"
GPUArrays = "11"
JLArrays = "0.3"
LinearAlgebra = "1.10"
ManifoldDiff = "0.4.5"
Manifolds = "0.11.12"
ManifoldsBase = "2.3.1"
Test = "1.10"
julia = "1.10"
100 changes: 100 additions & 0 deletions test/cuda/test_sphere.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Manifolds, ManifoldsGPU, Test, Random, CUDA

@testset "Sphere CUDA" begin
@testset "inner and norm" begin
Random.seed!(70)
Expand Down Expand Up @@ -46,4 +48,102 @@
@test isapprox(i_gpu, i_cpu; atol = 1.0f-4, rtol = 1.0f-4)
@test isapprox(n_gpu, n_cpu; atol = 1.0f-4, rtol = 1.0f-4)
end

@testset "project!" begin
for T in [Float32, Float64]
Random.seed!(72)

M = Sphere(7)
MP = PowerManifold(M, 32)

p = T.(rand(MP))
q_cpu = similar(p)
project!(MP, q_cpu, p)

p_cu = CuArray(p)
q_cu = similar(p_cu)
project!(MP, q_cu, p_cu)

if T === Float32
@test isapprox(Array(q_cu), q_cpu; atol = 1.0f-5, rtol = 1.0f-5)
else
@test isapprox(Array(q_cu), q_cpu; atol = 1.0e-12, rtol = 1.0e-12)
end
end
end

@testset "project! tangent" begin
for (seed, T, atol, rtol) in (
(721, Float32, 1.0f-5, 1.0f-5),
(722, Float64, 1.0e-12, 1.0e-12),
)
Random.seed!(seed)

M = Sphere(7)
MP = PowerManifold(M, 32)

p = T.(rand(MP))
X = T.(randn(size(p)))

Y_cpu = similar(p)
project!(MP, Y_cpu, p, X)

p_cu = CuArray(p)
X_cu = CuArray(X)
Y_cu = similar(p_cu)
project!(MP, Y_cu, p_cu, X_cu)

@test isapprox(Array(Y_cu), Y_cpu; atol = atol, rtol = rtol)
end
end

@testset "exp!" begin
for (seed, T, atol, rtol) in (
(73, Float32, 1.0f-4, 1.0f-4),
(74, Float64, 1.0e-10, 1.0e-10),
)
Random.seed!(seed)

M = Sphere(7)
MP = PowerManifold(M, 32)

p = T.(rand(MP))
X = T.(rand(MP; vector_at = p))

q_cpu = similar(p)
exp!(MP, q_cpu, p, X)

p_cu = CuArray(p)
X_cu = CuArray(X)
q_cu = similar(p_cu)
exp!(MP, q_cu, p_cu, X_cu)

@test isapprox(Array(q_cu), q_cpu; atol = atol, rtol = rtol)
end
end

@testset "log!" begin
for (seed, T, atol, rtol) in (
(75, Float32, 5.0f-4, 5.0f-4),
(76, Float64, 1.0e-9, 1.0e-9),
)
Random.seed!(seed)

M = Sphere(7)
MP = PowerManifold(M, 32)

p = T.(rand(MP))
q = T.(rand(MP))

X_cpu = similar(p)
log!(MP, X_cpu, p, q)

p_cu = CuArray(p)
q_cu = CuArray(q)
X_cu = similar(p_cu)
log!(MP, X_cu, p_cu, q_cu)

@test isapprox(Array(X_cu), X_cpu; atol = atol, rtol = rtol)
end
end
end
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ using GPUArrays
else
@info "CUDA not available, skipping CUDA tests"
end

include(joinpath(@__DIR__, "test_aqua.jl"))
end
5 changes: 5 additions & 0 deletions test/test_aqua.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using Aqua, ManifoldsGPU, Test

@testset "Aqua.jl" begin
Aqua.test_all(ManifoldsGPU)
end