|
1 | | -# @profile infrastructure |
| 1 | +# @profile infrastructure (GPU-dependent, can't execute during precompilation) |
2 | 2 | precompile(Tuple{typeof(Profile.detect_cupti)}) |
3 | 3 | precompile(Tuple{typeof(Profile.profile_internally), Function}) |
4 | 4 | precompile(Tuple{typeof(Profile.capture), CUPTI.ActivityConfig}) |
| 5 | + |
| 6 | +using PrecompileTools |
| 7 | + |
| 8 | +@compile_workload begin |
| 9 | + # exercise the @profile display path with a dummy result (no GPU needed). |
| 10 | + # the show method expects at least two cuCtxSynchronize entries in the host trace |
| 11 | + # to delimit the profiled region, and at least one event between them. |
| 12 | + dummy = Profile.ProfileResults(; |
| 13 | + host = ( |
| 14 | + id = Int[1, 2, 3, 4], |
| 15 | + start = Float64[0.0, 0.001, 0.002, 0.010], |
| 16 | + stop = Float64[0.001, 0.002, 0.009, 0.011], |
| 17 | + name = String["cuCtxSynchronize", "cuCtxSynchronize", |
| 18 | + "cuLaunchKernel", "cuCtxSynchronize"], |
| 19 | + tid = Int[1, 1, 1, 1], |
| 20 | + ), |
| 21 | + device = ( |
| 22 | + id = Int[3], |
| 23 | + start = Float64[0.003], |
| 24 | + stop = Float64[0.008], |
| 25 | + name = String["kernel"], |
| 26 | + device = Int[0], |
| 27 | + context = Int[1], |
| 28 | + stream = Int[1], |
| 29 | + grid = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)], |
| 30 | + block = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)], |
| 31 | + registers = Union{Missing,Int64}[32], |
| 32 | + shared_mem = Union{Missing,@NamedTuple{static::Int64,dynamic::Int64}}[(static=0,dynamic=0)], |
| 33 | + local_mem = Union{Missing,@NamedTuple{thread::Int64,total::Int64}}[(thread=0,total=0)], |
| 34 | + size = Union{Missing,Int64}[missing], |
| 35 | + ), |
| 36 | + nvtx = ( |
| 37 | + id = Int[], |
| 38 | + start = Float64[], |
| 39 | + type = Symbol[], |
| 40 | + tid = Int[], |
| 41 | + name = Union{Missing,String}[], |
| 42 | + domain = Union{Missing,String}[], |
| 43 | + ), |
| 44 | + ) |
| 45 | + show(devnull, dummy) |
| 46 | +end |
0 commit comments