|
1 | | -@compile_workload begin |
2 | | - # compile a dummy kernel to PTX to precompile the GPUCompiler pipeline. |
3 | | - # this doesn't need a GPU — it only uses LLVM. |
4 | | - let |
5 | | - function _precompile_vadd(a) |
6 | | - i = threadIdx().x |
7 | | - @inbounds a[i] += 1f0 |
8 | | - return nothing |
9 | | - end |
| 1 | +# `llvm_compat()` requires being able to initialize the NVPTX backend, so we run the |
| 2 | +# precompile workload only when that's supported, to be able to load this package also on |
| 3 | +# systems where the backend isn't available. |
| 4 | +if :NVPTX in LLVM.backends() |
| 5 | + @compile_workload begin |
| 6 | + # compile a dummy kernel to PTX to precompile the GPUCompiler pipeline. |
| 7 | + # this doesn't need a GPU — it only uses LLVM. |
| 8 | + let |
| 9 | + function _precompile_vadd(a) |
| 10 | + i = threadIdx().x |
| 11 | + @inbounds a[i] += 1f0 |
| 12 | + return nothing |
| 13 | + end |
10 | 14 |
|
11 | | - llvm_support = llvm_compat() |
12 | | - llvm_cap = maximum(filter(<=(v"7.5"), llvm_support.cap)) |
13 | | - llvm_ptx = maximum(filter(>=(v"6.2"), llvm_support.ptx)) |
| 15 | + llvm_support = llvm_compat() |
| 16 | + llvm_cap = maximum(filter(<=(v"7.5"), llvm_support.cap)) |
| 17 | + llvm_ptx = maximum(filter(>=(v"6.2"), llvm_support.ptx)) |
14 | 18 |
|
15 | | - target = PTXCompilerTarget(; cap=llvm_cap, ptx=llvm_ptx, debuginfo=true) |
16 | | - params = CUDACompilerParams(; cap=llvm_cap, ptx=llvm_ptx) |
17 | | - config = CompilerConfig(target, params; kernel=true, name=nothing, always_inline=false) |
| 19 | + target = PTXCompilerTarget(; cap=llvm_cap, ptx=llvm_ptx, debuginfo=true) |
| 20 | + params = CUDACompilerParams(; cap=llvm_cap, ptx=llvm_ptx) |
| 21 | + config = CompilerConfig(target, params; kernel=true, name=nothing, always_inline=false) |
18 | 22 |
|
19 | | - tt = Tuple{CuDeviceArray{Float32,1,AS.Global}} |
20 | | - source = methodinstance(typeof(_precompile_vadd), tt) |
21 | | - job = CompilerJob(source, config) |
| 23 | + tt = Tuple{CuDeviceArray{Float32,1,AS.Global}} |
| 24 | + source = methodinstance(typeof(_precompile_vadd), tt) |
| 25 | + job = CompilerJob(source, config) |
22 | 26 |
|
23 | | - # On Julia < 1.12, GPU compilation during precompilation leaks foreign |
24 | | - # MIs into native compilation, causing LLVM errors |
25 | | - # (e.g. "Cannot select: intrinsic %llvm.nvvm.membar.sys"). |
26 | | - @static if VERSION >= v"1.12-" |
27 | | - JuliaContext() do ctx |
28 | | - GPUCompiler.compile(:asm, job) |
| 27 | + # On Julia < 1.12, GPU compilation during precompilation leaks foreign |
| 28 | + # MIs into native compilation, causing LLVM errors |
| 29 | + # (e.g. "Cannot select: intrinsic %llvm.nvvm.membar.sys"). |
| 30 | + @static if VERSION >= v"1.12-" |
| 31 | + JuliaContext() do ctx |
| 32 | + GPUCompiler.compile(:asm, job) |
| 33 | + end |
29 | 34 | end |
30 | 35 | end |
31 | 36 | end |
|
0 commit comments