Skip to content

Commit 441fec5

Browse files
authored
Make CUDA & friends loadable on systems without NVPTX LLVM backend (#3067)
1 parent d95add6 commit 441fec5

3 files changed

Lines changed: 31 additions & 26 deletions

File tree

CUDACore/src/precompile.jl

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,36 @@
1-
@compile_workload begin
2-
# compile a dummy kernel to PTX to precompile the GPUCompiler pipeline.
3-
# this doesn't need a GPU — it only uses LLVM.
4-
let
5-
function _precompile_vadd(a)
6-
i = threadIdx().x
7-
@inbounds a[i] += 1f0
8-
return nothing
9-
end
1+
# `llvm_compat()` requires being able to initialize the NVPTX backend, so we run the
2+
# precompile workload only when that's supported, to be able to load this package also on
3+
# systems where the backend isn't available.
4+
if :NVPTX in LLVM.backends()
5+
@compile_workload begin
6+
# compile a dummy kernel to PTX to precompile the GPUCompiler pipeline.
7+
# this doesn't need a GPU — it only uses LLVM.
8+
let
9+
function _precompile_vadd(a)
10+
i = threadIdx().x
11+
@inbounds a[i] += 1f0
12+
return nothing
13+
end
1014

11-
llvm_support = llvm_compat()
12-
llvm_cap = maximum(filter(<=(v"7.5"), llvm_support.cap))
13-
llvm_ptx = maximum(filter(>=(v"6.2"), llvm_support.ptx))
15+
llvm_support = llvm_compat()
16+
llvm_cap = maximum(filter(<=(v"7.5"), llvm_support.cap))
17+
llvm_ptx = maximum(filter(>=(v"6.2"), llvm_support.ptx))
1418

15-
target = PTXCompilerTarget(; cap=llvm_cap, ptx=llvm_ptx, debuginfo=true)
16-
params = CUDACompilerParams(; cap=llvm_cap, ptx=llvm_ptx)
17-
config = CompilerConfig(target, params; kernel=true, name=nothing, always_inline=false)
19+
target = PTXCompilerTarget(; cap=llvm_cap, ptx=llvm_ptx, debuginfo=true)
20+
params = CUDACompilerParams(; cap=llvm_cap, ptx=llvm_ptx)
21+
config = CompilerConfig(target, params; kernel=true, name=nothing, always_inline=false)
1822

19-
tt = Tuple{CuDeviceArray{Float32,1,AS.Global}}
20-
source = methodinstance(typeof(_precompile_vadd), tt)
21-
job = CompilerJob(source, config)
23+
tt = Tuple{CuDeviceArray{Float32,1,AS.Global}}
24+
source = methodinstance(typeof(_precompile_vadd), tt)
25+
job = CompilerJob(source, config)
2226

23-
# On Julia < 1.12, GPU compilation during precompilation leaks foreign
24-
# MIs into native compilation, causing LLVM errors
25-
# (e.g. "Cannot select: intrinsic %llvm.nvvm.membar.sys").
26-
@static if VERSION >= v"1.12-"
27-
JuliaContext() do ctx
28-
GPUCompiler.compile(:asm, job)
27+
# On Julia < 1.12, GPU compilation during precompilation leaks foreign
28+
# MIs into native compilation, causing LLVM errors
29+
# (e.g. "Cannot select: intrinsic %llvm.nvvm.membar.sys").
30+
@static if VERSION >= v"1.12-"
31+
JuliaContext() do ctx
32+
GPUCompiler.compile(:asm, job)
33+
end
2934
end
3035
end
3136
end

CUDATools/src/CUDATools.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ using GPUCompiler
2323
using GPUCompiler: CompilerJob, methodinstance
2424
using LLVM
2525

26-
using CUDA_Compiler_jll: nvdisasm
26+
using CUDA_Compiler_jll: CUDA_Compiler_jll
2727

2828
import Preferences
2929
using Printf

CUDATools/src/reflection.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ function disassemble_cubin(io::IO, cubin::Vector{Cchar}; raw::Bool)
126126
write(cubin_io, cubin)
127127
flush(cubin_io)
128128

129-
cmd = `$(nvdisasm()) --print-code --print-line-info $cubin_path`
129+
cmd = `$(CUDA_Compiler_jll.nvdisasm()) --print-code --print-line-info $cubin_path`
130130
for line in readlines(cmd)
131131
if !raw
132132
# nvdisasm output is pretty verbose;

0 commit comments

Comments
 (0)