Skip to content

Commit c5f61ee

Browse files
vchuravyclaude
andcommitted
Add tests for muladd override and LLVM intrinsic fma/muladd codegen
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent ebcd217 commit c5f61ee

2 files changed

Lines changed: 54 additions & 0 deletions

File tree

test/core/codegen.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@ end
2222
@test !occursin("@__nv_fmaf", ir)
2323
end
2424

25+
@testset "fma uses LLVM intrinsic" begin
26+
function fma_kernel(ptr)
27+
unsafe_store!(ptr, fma(unsafe_load(ptr), unsafe_load(ptr,2), unsafe_load(ptr,3)))
28+
return
29+
end
30+
31+
for (T, suffix) in ((Float32, "f32"), (Float64, "f64"), (Float16, "f16"))
32+
ir = sprint(io->CUDA.code_llvm(io, fma_kernel, Tuple{Ptr{T}}))
33+
@test occursin("llvm.fma.$suffix", ir)
34+
@test !occursin("__nv_fma", ir)
35+
end
36+
end
37+
38+
@testset "muladd uses LLVM intrinsic" begin
39+
function muladd_kernel(ptr)
40+
unsafe_store!(ptr, muladd(unsafe_load(ptr), unsafe_load(ptr,2), unsafe_load(ptr,3)))
41+
return
42+
end
43+
44+
for (T, suffix) in ((Float32, "f32"), (Float64, "f64"), (Float16, "f16"))
45+
ir = sprint(io->CUDA.code_llvm(io, muladd_kernel, Tuple{Ptr{T}}))
46+
@test occursin("llvm.fmuladd.$suffix", ir)
47+
end
48+
end
49+
2550
@testset "assume" begin
2651
foo(i) = cld(42, i)
2752
ir = sprint(io->CUDA.code_llvm(io, foo, Tuple{Int}))

test/core/device/intrinsics/math.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,35 @@ using SpecialFunctions
8585
end
8686
end
8787

88+
@testset "muladd" begin
89+
for T in (Float16, Float32, Float64)
90+
@test testf((x,y,z)->muladd.(x,y,z), rand(T, 1), rand(T, 1), rand(T, 1))
91+
@test testf((x,y,z)->muladd.(x,y,z), rand(T, 1), -rand(T, 1), -rand(T, 1))
92+
end
93+
end
94+
95+
@testset "fma/muladd PTX codegen" begin
96+
# fma and muladd should both lower to fma.rn in PTX
97+
function fma_kernel(a, b, c)
98+
@inbounds a[] = fma(b[], c[], a[])
99+
return
100+
end
101+
function muladd_kernel(a, b, c)
102+
@inbounds a[] = muladd(b[], c[], a[])
103+
return
104+
end
105+
106+
for T in (Float32, Float64)
107+
asm = sprint(io->CUDA.code_ptx(io, fma_kernel,
108+
NTuple{3,CuDeviceArray{T,1,AS.Global}}))
109+
@test occursin("fma.rn", asm)
110+
111+
asm = sprint(io->CUDA.code_ptx(io, muladd_kernel,
112+
NTuple{3,CuDeviceArray{T,1,AS.Global}}))
113+
@test occursin("fma.rn", asm)
114+
end
115+
end
116+
88117
# something from SpecialFunctions.jl
89118
@testset "erf" begin
90119
@test testf(a->SpecialFunctions.erf.(a), Float32[1.0])

0 commit comments

Comments
 (0)