Error in testset examples/batchmatmul:
Error During Test at /home/anton/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334
Got exception outside of a @test
LoadError: AssertionError: max diff: 14.4634285
Stacktrace:
[1] verify(data::@NamedTuple{A::CUDA.CuArray{Float32, 3, CUDA.DeviceMemory}, B::CUDA.CuArray{Float32, 3, CUDA.DeviceMemory}, C::CUDA.CuArray{Float32, 3, CUDA.DeviceMemory}, M::Int64, K::Int64, N::Int64, Batch::Int64}, result::@NamedTuple{C::CUDA.CuArray{Float32, 3, CUDA.DeviceMemory}, times::Vector{Float64}})
@ Main.var"##examples/batchmatmul#277".var"##277" ~/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:104
[2] test_batch_matmul(::Type{Float32}, M::Int64, K::Int64, N::Int64, Batch::Int64, tm::Int64, tn::Int64, tk::Int64; name::Nothing)
@ Main.var"##examples/batchmatmul#277".var"##277" ~/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:140
[3] test_batch_matmul
@ ~/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:135 [inlined]
[4] main()
@ Main.var"##examples/batchmatmul#277".var"##277" ~/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:148
[5] top-level scope
@ ~/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:160
[6] include(mapexpr::Function, mod::Module, _path::String)
@ Base ./Base.jl:307
[7] IncludeInto
@ ./Base.jl:308 [inlined]
[8] #2
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:34 [inlined]
[9] (::Base.RedirectStdStream)(thunk::Main.var"##examples/batchmatmul#277".var"##277".var"#2#3", stream::Base.DevNull)
@ Base ./stream.jl:1464
[10] top-level scope
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:33
[11] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[12] (::Main.var"##examples/batchmatmul#277".var"#2#3")()
@ Main.var"##examples/batchmatmul#277" ~/.julia/packages/cuTile/shvBr/test/runtests.jl:32
[13] cd(f::Main.var"##examples/batchmatmul#277".var"#2#3", dir::String)
@ Base.Filesystem ./file.jl:112
[14] macro expansion
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:30 [inlined]
[15] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[16] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[17] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[18] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[19] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334 [inlined]
[20] macro expansion
@ ./timing.jl:697 [inlined]
[21] top-level scope
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:333
[22] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[23] (::ParallelTestRunner.var"#inner#runtest##0"{Expr, String, Expr, Float64})()
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:326
[24] runtest(f::Expr, name::String, init_code::Expr, start_time::Float64)
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:359
[25] (::var"#handle##0#handle##1"{Sockets.TCPSocket, UInt64, Bool, @Kwargs{}, Tuple{typeof(ParallelTestRunner.runtest), Expr, String, Expr, Float64}, typeof(invokelatest)})()
@ Main ~/.julia/packages/Malt/yA40d/src/worker.jl:120
in expression starting at /home/anton/.julia/packages/cuTile/shvBr/examples/batchmatmul.jl:160
Error in testset examples/matmul:
Error During Test at /home/anton/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334
Got exception outside of a @test
LoadError: AssertionError: max diff: 14.111668
Stacktrace:
[1] verify(data::@NamedTuple{A::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, B::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, C::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, M::Int64, N::Int64, K::Int64}, result::@NamedTuple{C::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, times::Vector{Float64}})
@ Main.var"##examples/matmul#277".var"##280" ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:105
[2] test_matmul(::Type{Float32}, M::Int64, N::Int64, K::Int64, tm::Int64, tn::Int64, tk::Int64; name::Nothing)
@ Main.var"##examples/matmul#277".var"##280" ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:141
[3] test_matmul(::Type{Float32}, M::Int64, N::Int64, K::Int64, tm::Int64, tn::Int64, tk::Int64)
@ Main.var"##examples/matmul#277".var"##280" ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:136
[4] main(T::Type)
@ Main.var"##examples/matmul#277".var"##280" ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:149
[5] main()
@ Main.var"##examples/matmul#277".var"##280" ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:146
[6] top-level scope
@ ~/.julia/packages/cuTile/shvBr/examples/matmul.jl:162
[7] include(mapexpr::Function, mod::Module, _path::String)
@ Base ./Base.jl:307
[8] IncludeInto
@ ./Base.jl:308 [inlined]
[9] #2
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:34 [inlined]
[10] (::Base.RedirectStdStream)(thunk::Main.var"##examples/matmul#277".var"##280".var"#2#3", stream::Base.DevNull)
@ Base ./stream.jl:1464
[11] top-level scope
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:33
[12] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[13] (::Main.var"##examples/matmul#277".var"#2#3")()
@ Main.var"##examples/matmul#277" ~/.julia/packages/cuTile/shvBr/test/runtests.jl:32
[14] cd(f::Main.var"##examples/matmul#277".var"#2#3", dir::String)
@ Base.Filesystem ./file.jl:112
[15] macro expansion
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:30 [inlined]
[16] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[17] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[18] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[19] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[20] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334 [inlined]
[21] macro expansion
@ ./timing.jl:697 [inlined]
[22] top-level scope
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:333
[23] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[24] (::ParallelTestRunner.var"#inner#runtest##0"{Expr, String, Expr, Float64})()
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:326
[25] runtest(f::Expr, name::String, init_code::Expr, start_time::Float64)
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:359
[26] (::var"#handle##0#handle##1"{Sockets.TCPSocket, UInt64, Bool, @Kwargs{}, Tuple{typeof(ParallelTestRunner.runtest), Expr, String, Expr, Float64}, typeof(invokelatest)})()
@ Main ~/.julia/packages/Malt/yA40d/src/worker.jl:120
in expression starting at /home/anton/.julia/packages/cuTile/shvBr/examples/matmul.jl:162
Error in testset scalar indexing as loop bound:
Test Failed at /home/anton/.julia/packages/cuTile/shvBr/test/execution/control_flow.jl:105
Expression: out_cpu[(bid - 1) * 16 + 1:bid * 16] ≈ expected
Evaluated: Float32[0.879777, 0.93038654, 0.50196046, 0.5840501, 0.20408784, 0.59009755, 0.9062521, 0.41234535, 0.08410301, 0.84389627, 0.3908352, 0.8718339, 0.72525173, 0.20444797, 0.21902405, 0.5356413] ≈ Float32[0.92256486, 1.5014486, 0.9544239, 1.0048158, 0.55284977, 0.85161656, 1.8732452, 0.86960614, 1.067309, 0.95421255, 0.5070774, 1.0760301, 1.5747511, 0.402138, 0.7896106, 0.7162036]
Error in testset scalar indexing as loop bound:
Test Failed at /home/anton/.julia/packages/cuTile/shvBr/test/execution/control_flow.jl:105
Expression: out_cpu[(bid - 1) * 16 + 1:bid * 16] ≈ expected
Evaluated: Float32[0.92256486, 1.5014486, 0.9544239, 1.0048158, 0.55284977, 0.85161656, 1.8732452, 0.86960614, 1.067309, 0.95421255, 0.5070774, 1.0760301, 1.5747511, 0.402138, 0.7896106, 0.7162036] ≈ Float32[1.3424685, 2.210954, 1.4987373, 1.7911042, 1.2071617, 1.2872905, 2.5009568, 1.2338834, 1.7725074, 1.8615453, 1.4450717, 1.7008188, 2.5657296, 0.9148998, 1.5679075, 0.80467093]
Error in testset scalar indexing as loop bound:
Test Failed at /home/anton/.julia/packages/cuTile/shvBr/test/execution/control_flow.jl:105
Expression: out_cpu[(bid - 1) * 16 + 1:bid * 16] ≈ expected
Evaluated: Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] ≈ Float32[0.879777, 0.93038654, 0.50196046, 0.5840501, 0.20408784, 0.59009755, 0.9062521, 0.41234535, 0.08410301, 0.84389627, 0.3908352, 0.8718339, 0.72525173, 0.20444797, 0.21902405, 0.5356413]
Error in testset examples/layernorm:
Error During Test at /home/anton/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334
Got exception outside of a @test
LoadError: AssertionError: Y mismatch
Stacktrace:
[1] verify(data::@NamedTuple{X::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, W::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, B::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, Y::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, Mean::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, Rstd::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, DY::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, DX::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, DW_partial::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, DB_partial::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, Locks::CUDA.CuArray{Int64, 1, CUDA.DeviceMemory}, FINAL_DW::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, FINAL_DB::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, M::Int64, N::Int64, eps::Float32, GROUP_SIZE_M::Int64}, result::@NamedTuple{Y::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, Mean::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, Rstd::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, DX::CUDA.CuArray{Float32, 2, CUDA.DeviceMemory}, FINAL_DW::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, FINAL_DB::CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}, times_fwd::Vector{Float64}, times_bwd::Vector{Float64}})
@ Main.var"##examples/layernorm#277".var"##279" ~/.julia/packages/cuTile/shvBr/examples/layernorm.jl:360
[2] test_layernorm(M::Int64, N::Int64, TILE_N::Int64; TILE_M::Int64, eps::Float32, name::Nothing)
@ Main.var"##examples/layernorm#277".var"##279" ~/.julia/packages/cuTile/shvBr/examples/layernorm.jl:380
[3] test_layernorm
@ ~/.julia/packages/cuTile/shvBr/examples/layernorm.jl:375 [inlined]
[4] main()
@ Main.var"##examples/layernorm#277".var"##279" ~/.julia/packages/cuTile/shvBr/examples/layernorm.jl:393
[5] top-level scope
@ ~/.julia/packages/cuTile/shvBr/examples/layernorm.jl:400
[6] include(mapexpr::Function, mod::Module, _path::String)
@ Base ./Base.jl:307
[7] IncludeInto
@ ./Base.jl:308 [inlined]
[8] #2
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:34 [inlined]
[9] (::Base.RedirectStdStream)(thunk::Main.var"##examples/layernorm#277".var"##279".var"#2#3", stream::Base.DevNull)
@ Base ./stream.jl:1464
[10] top-level scope
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:33
[11] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[12] (::Main.var"##examples/layernorm#277".var"#2#3")()
@ Main.var"##examples/layernorm#277" ~/.julia/packages/cuTile/shvBr/test/runtests.jl:32
[13] cd(f::Main.var"##examples/layernorm#277".var"#2#3", dir::String)
@ Base.Filesystem ./file.jl:112
[14] macro expansion
@ ~/.julia/packages/cuTile/shvBr/test/runtests.jl:30 [inlined]
[15] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[16] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[17] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:335 [inlined]
[18] macro expansion
@ ~/.julia/juliaup/julia-1.12.5+0.x64.linux.gnu/share/julia/stdlib/v1.12/Test/src/Test.jl:1776 [inlined]
[19] macro expansion
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:334 [inlined]
[20] macro expansion
@ ./timing.jl:697 [inlined]
[21] top-level scope
@ ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:333
[22] eval(m::Module, e::Any)
@ Core ./boot.jl:489
[23] (::ParallelTestRunner.var"#inner#runtest##0"{Expr, String, Expr, Float64})()
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:326
[24] runtest(f::Expr, name::String, init_code::Expr, start_time::Float64)
@ ParallelTestRunner ~/.julia/packages/ParallelTestRunner/EzCGZ/src/ParallelTestRunner.jl:359
[25] (::var"#handle##0#handle##1"{Sockets.TCPSocket, UInt64, Bool, @Kwargs{}, Tuple{typeof(ParallelTestRunner.runtest), Expr, String, Expr, Float64}, typeof(invokelatest)})()
@ Main ~/.julia/packages/Malt/yA40d/src/worker.jl:120
in expression starting at /home/anton/.julia/packages/cuTile/shvBr/examples/layernorm.jl:400
ERROR: LoadError: Test run finished with errors
in expression starting at /home/anton/.julia/packages/cuTile/shvBr/test/runtests.jl:54
ERROR: Package cuTile errored during testing
EDIT: I originally thought this was GPU architecture-dependent as it was inconsistent across my machines, but I found that with IRStructurizer v0.2.0 I get:
Click to view log
So the compat entry should be updated. This is a rare problem because Julia packages follows semver and we're most often on the latest possible version, but there's some issue with General at the moment I think. See https://julialang.slack.com/archives/C67910KEH/p1773921174958079