Merged
Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/texture.jl b/src/texture.jl
index fc1590423..bbdca57db 100644
--- a/src/texture.jl
+++ b/src/texture.jl
@@ -97,7 +97,7 @@ end
# idempotency
CuTextureArray{T,N}(xs::CuTextureArray{T,N}) where {T,N} = xs
-CuTextureArray(xs::CuTextureArray{T,N}) where {T,N} = xs
+CuTextureArray(xs::CuTextureArray{T, N}) where {T, N} = xs
CuTextureArray(A::AbstractArray{T,N}) where {T,N} = CuTextureArray{T,N}(A)
diff --git a/test/core/cudadrv.jl b/test/core/cudadrv.jl
index 2372cb568..7772cf9cb 100644
--- a/test/core/cudadrv.jl
+++ b/test/core/cudadrv.jl
@@ -435,15 +435,15 @@ nb = sizeof(data)
typed_pointer(buf::Union{CUDA.DeviceMemory, CUDA.UnifiedMemory}, T) = convert(CuPtr{T}, buf)
typed_pointer(buf::CUDA.HostMemory, T) = convert(Ptr{T}, buf)
-@testset "showing" begin
- for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
- dummy = CUDA.alloc(Ty, 0)
- @test startswith(sprint(show, dummy), str)
- CUDA.free(dummy)
+ @testset "showing" begin
+ for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
+ dummy = CUDA.alloc(Ty, 0)
+ @test startswith(sprint(show, dummy), str)
+ CUDA.free(dummy)
+ end
end
-end
-@testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
+ @testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
dstTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory]
dummy = CUDA.alloc(srcTy, 0)
@@ -479,7 +479,7 @@ end
# test device with context in which pointer was allocated.
@test device(typed_pointer(src, T)) == device()
- @test context(typed_pointer(src, T)) == context()
+ @test context(typed_pointer(src, T)) == context()
if !memory_pools_supported(device())
# NVIDIA bug #3319609
@test context(typed_pointer(src, T)) == context()
@@ -503,7 +503,7 @@ end
CUDA.free(dst)
end
-@testset "pointer attributes" begin
+ @testset "pointer attributes" begin
src = CUDA.alloc(CUDA.DeviceMemory, nb)
attribute!(typed_pointer(src, T), CUDA.POINTER_ATTRIBUTE_SYNC_MEMOPS, 0)
@@ -511,7 +511,7 @@ end
CUDA.free(src)
end
-@testset "asynchronous operations" begin
+ @testset "asynchronous operations" begin
src = CUDA.alloc(CUDA.DeviceMemory, nb)
unsafe_copyto!(typed_pointer(src, T), pointer(data), N; async=true)
@@ -521,7 +521,7 @@ end
CUDA.free(src)
end
-@testset "pinned memory" begin
+ @testset "pinned memory" begin
# create a pinned and mapped buffer
src = CUDA.alloc(CUDA.HostMemory, nb, CUDA.MEMHOSTALLOC_DEVICEMAP)
@@ -553,16 +553,16 @@ if attribute(device(), CUDA.DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED) != 0
CUDA.unregister(src)
- # with a RefValue
- src = Ref{T}(T(42))
- CUDA.pin(src)
- cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
- ref = Array{T}(undef, 1)
- unsafe_copyto!(pointer(ref), cpu_ptr, 1)
- @test ref == [T(42)]
+ # with a RefValue
+ src = Ref{T}(T(42))
+ CUDA.pin(src)
+ cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
+ ref = Array{T}(undef, 1)
+ unsafe_copyto!(pointer(ref), cpu_ptr, 1)
+ @test ref == [T(42)]
end
-@testset "unified memory" begin
+ @testset "unified memory" begin
src = CUDA.alloc(CUDA.UnifiedMemory, nb)
@test_throws BoundsError CUDA.prefetch(src, 2*nb; device=CUDA.DEVICE_CPU)
@@ -583,7 +583,7 @@ end
CUDA.free(src)
end
-@testset "3d memcpy" begin
+ @testset "3d memcpy" begin
# TODO: use cuMemAllocPitch (and put pitch in buffer?) to actually get benefit from this
data = collect(reshape(1:27, 3, 3, 3)) |
Codecov ReportAll modified and coverable lines are covered by tests ✅
Additional details and impacted files@@ Coverage Diff @@
## master #2684 +/- ##
==========================================
+ Coverage 82.57% 82.86% +0.28%
==========================================
Files 153 153
Lines 13606 13606
==========================================
+ Hits 11235 11274 +39
+ Misses 2371 2332 -39 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
Contributor
There was a problem hiding this comment.
CUDA.jl Benchmarks
Details
| Benchmark suite | Current: dbb2215 | Previous: 6bf72dd | Ratio |
|---|---|---|---|
latency/precompile |
46183641505 ns |
46450283323 ns |
0.99 |
latency/ttfp |
7001382825 ns |
7028014177 ns |
1.00 |
latency/import |
3651822156 ns |
3667348189 ns |
1.00 |
integration/volumerhs |
9616637.5 ns |
9625836 ns |
1.00 |
integration/byval/slices=1 |
146680.5 ns |
146875 ns |
1.00 |
integration/byval/slices=3 |
425045 ns |
424860 ns |
1.00 |
integration/byval/reference |
144900 ns |
144959 ns |
1.00 |
integration/byval/slices=2 |
285974 ns |
285961 ns |
1.00 |
integration/cudadevrt |
103233 ns |
103332 ns |
1.00 |
kernel/indexing |
13949 ns |
14061 ns |
0.99 |
kernel/indexing_checked |
14499 ns |
14775 ns |
0.98 |
kernel/occupancy |
637.1046511627907 ns |
656.859649122807 ns |
0.97 |
kernel/launch |
2004.2 ns |
2090.2 ns |
0.96 |
kernel/rand |
14598 ns |
16643 ns |
0.88 |
array/reverse/1d |
19363 ns |
19602 ns |
0.99 |
array/reverse/2d |
24535 ns |
24553 ns |
1.00 |
array/reverse/1d_inplace |
10719 ns |
11159 ns |
0.96 |
array/reverse/2d_inplace |
12480 ns |
13010 ns |
0.96 |
array/copy |
21175 ns |
20672 ns |
1.02 |
array/iteration/findall/int |
157906 ns |
157732 ns |
1.00 |
array/iteration/findall/bool |
138756 ns |
138853.5 ns |
1.00 |
array/iteration/findfirst/int |
152794 ns |
153651.5 ns |
0.99 |
array/iteration/findfirst/bool |
154428 ns |
153926 ns |
1.00 |
array/iteration/scalar |
70496 ns |
72447 ns |
0.97 |
array/iteration/logical |
213423 ns |
206640.5 ns |
1.03 |
array/iteration/findmin/1d |
41018 ns |
40619 ns |
1.01 |
array/iteration/findmin/2d |
93431 ns |
93219 ns |
1.00 |
array/reductions/reduce/1d |
35842 ns |
34826 ns |
1.03 |
array/reductions/reduce/2d |
40715 ns |
50621 ns |
0.80 |
array/reductions/mapreduce/1d |
32909 ns |
32476 ns |
1.01 |
array/reductions/mapreduce/2d |
41241 ns |
50769 ns |
0.81 |
array/broadcast |
20512 ns |
20427 ns |
1.00 |
array/copyto!/gpu_to_gpu |
13698 ns |
11886 ns |
1.15 |
array/copyto!/cpu_to_gpu |
208479.5 ns |
207751.5 ns |
1.00 |
array/copyto!/gpu_to_cpu |
243344 ns |
245794 ns |
0.99 |
array/accumulate/1d |
108244 ns |
109015 ns |
0.99 |
array/accumulate/2d |
80382 ns |
79626 ns |
1.01 |
array/construct |
1302.7 ns |
1306 ns |
1.00 |
array/random/randn/Float32 |
43238 ns |
43298.5 ns |
1.00 |
array/random/randn!/Float32 |
26422 ns |
26052 ns |
1.01 |
array/random/rand!/Int64 |
26958 ns |
26998 ns |
1.00 |
array/random/rand!/Float32 |
8688.5 ns |
8602.333333333334 ns |
1.01 |
array/random/rand/Int64 |
29725 ns |
29780 ns |
1.00 |
array/random/rand/Float32 |
13025 ns |
12942 ns |
1.01 |
array/permutedims/4d |
61583 ns |
60894 ns |
1.01 |
array/permutedims/2d |
55519.5 ns |
55115 ns |
1.01 |
array/permutedims/3d |
55841.5 ns |
55898 ns |
1.00 |
array/sorting/1d |
2775794 ns |
2776458 ns |
1.00 |
array/sorting/by |
3367253 ns |
3369147.5 ns |
1.00 |
array/sorting/2d |
1084250 ns |
1084406 ns |
1.00 |
cuda/synchronization/stream/auto |
1004 ns |
1025.7 ns |
0.98 |
cuda/synchronization/stream/nonblocking |
6261.8 ns |
6461.2 ns |
0.97 |
cuda/synchronization/stream/blocking |
776.5849056603773 ns |
789.3663366336634 ns |
0.98 |
cuda/synchronization/context/auto |
1160.1 ns |
1164 ns |
1.00 |
cuda/synchronization/context/nonblocking |
6588 ns |
6604.6 ns |
1.00 |
cuda/synchronization/context/blocking |
903.7234042553191 ns |
889.4285714285714 ns |
1.02 |
This comment was automatically generated by workflow using github-action-benchmark.
maleadt
approved these changes
Mar 10, 2025
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Also added some more
@testsetblocks instead oflet