|
| 1 | +@compile_workload begin |
| 2 | + # compile a dummy kernel to PTX to precompile the GPUCompiler pipeline. |
| 3 | + # this doesn't need a GPU — it only uses LLVM. |
| 4 | + let |
| 5 | + function _precompile_vadd(a) |
| 6 | + i = threadIdx().x |
| 7 | + @inbounds a[i] += 1f0 |
| 8 | + return nothing |
| 9 | + end |
1 | 10 |
|
2 | | -# array |
3 | | -precompile(CuArray, (Vector{Int},)) |
| 11 | + llvm_support = llvm_compat() |
| 12 | + llvm_cap = maximum(filter(<=(v"7.5"), llvm_support.cap)) |
| 13 | + llvm_ptx = maximum(filter(>=(v"6.2"), llvm_support.ptx)) |
4 | 14 |
|
5 | | -# compilation |
6 | | -precompile(compiler_cache, (CuContext,)) |
7 | | -#precompile(compiler_config, (CuDevice,)) |
8 | | -precompile(compile, (CompilerJob,)) |
9 | | -precompile(link, (CompilerJob,NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}})) |
10 | | -precompile(create_exceptions!, (CuModule,)) |
11 | | -precompile(run_and_collect, (Cmd,)) |
| 15 | + target = PTXCompilerTarget(; cap=llvm_cap, ptx=llvm_ptx, debuginfo=true) |
| 16 | + params = CUDACompilerParams(; cap=llvm_cap, ptx=llvm_ptx) |
| 17 | + config = CompilerConfig(target, params; kernel=true, name=nothing, always_inline=false) |
12 | 18 |
|
13 | | -# launch |
14 | | -precompile(cudaconvert, (Function,)) |
15 | | -precompile(Core.kwfunc(cudacall), (NamedTuple{(:threads, :blocks), Tuple{Int64, Int64}},typeof(cudacall),CuFunction,Type{Tuple{}})) |
16 | | -precompile(Core.kwfunc(launch), (NamedTuple{(:threads, :blocks), Tuple{Int64, Int64}},typeof(launch),CuFunction)) |
| 19 | + tt = Tuple{CuDeviceArray{Float32,1,AS.Global}} |
| 20 | + source = methodinstance(typeof(_precompile_vadd), tt) |
| 21 | + job = CompilerJob(source, config) |
| 22 | + |
| 23 | + # On Julia < 1.12, GPU compilation during precompilation leaks foreign |
| 24 | + # MIs into native compilation, causing LLVM errors |
| 25 | + # (e.g. "Cannot select: intrinsic %llvm.nvvm.membar.sys"). |
| 26 | + @static if VERSION >= v"1.12-" |
| 27 | + JuliaContext() do ctx |
| 28 | + GPUCompiler.compile(:asm, job) |
| 29 | + end |
| 30 | + end |
| 31 | + end |
| 32 | +end |
| 33 | + |
| 34 | +# kernel launch infrastructure |
| 35 | +precompile(Tuple{typeof(cufunction), typeof(identity), Type{Tuple{Nothing}}}) |
| 36 | +precompile(Tuple{typeof(link), CompilerJob, NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}}}) |
| 37 | + |
| 38 | +# GPUCompiler compilation pipeline (specialized for CUDACore's compile/link) |
| 39 | +precompile(Tuple{typeof(GPUCompiler.actual_compilation), |
| 40 | + Dict{Any, CuFunction}, Core.MethodInstance, UInt64, |
| 41 | + CUDACompilerConfig, typeof(compile), typeof(link)}) |
| 42 | + |
| 43 | +# scalar reference (used by cuBLAS for alpha/beta parameters) |
| 44 | +precompile(Tuple{Type{CuRefValue{Float32}}, Float32}) |
| 45 | +precompile(Tuple{typeof(pool_free), Managed{DeviceMemory}}) |
0 commit comments