Skip to content

Commit e26dfb5

Browse files
committed
Precompile profile display.
1 parent be825a4 commit e26dfb5

2 files changed

Lines changed: 45 additions & 1 deletion

File tree

CUDATools/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1212
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1313
NVML = "611af6d1-644e-4c5d-bd58-854d7d1254b9"
1414
NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f"
15+
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1516
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
1617
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
1718
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
@@ -31,6 +32,7 @@ GPUCompiler = "1.4"
3132
LLVM = "9.3.1"
3233
NVML = "=6.0.0"
3334
NVTX = "1"
35+
PrecompileTools = "1"
3436
Preferences = "1"
3537
PrettyTables = "3"
3638
Printf = "1"

CUDATools/src/precompile.jl

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,46 @@
1-
# @profile infrastructure
1+
# @profile infrastructure (GPU-dependent, can't execute during precompilation)
22
precompile(Tuple{typeof(Profile.detect_cupti)})
33
precompile(Tuple{typeof(Profile.profile_internally), Function})
44
precompile(Tuple{typeof(Profile.capture), CUPTI.ActivityConfig})
5+
6+
using PrecompileTools
7+
8+
@compile_workload begin
9+
# exercise the @profile display path with a dummy result (no GPU needed).
10+
# the show method expects at least two cuCtxSynchronize entries in the host trace
11+
# to delimit the profiled region, and at least one event between them.
12+
dummy = Profile.ProfileResults(;
13+
host = (
14+
id = Int[1, 2, 3, 4],
15+
start = Float64[0.0, 0.001, 0.002, 0.010],
16+
stop = Float64[0.001, 0.002, 0.009, 0.011],
17+
name = String["cuCtxSynchronize", "cuCtxSynchronize",
18+
"cuLaunchKernel", "cuCtxSynchronize"],
19+
tid = Int[1, 1, 1, 1],
20+
),
21+
device = (
22+
id = Int[3],
23+
start = Float64[0.003],
24+
stop = Float64[0.008],
25+
name = String["kernel"],
26+
device = Int[0],
27+
context = Int[1],
28+
stream = Int[1],
29+
grid = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)],
30+
block = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)],
31+
registers = Union{Missing,Int64}[32],
32+
shared_mem = Union{Missing,@NamedTuple{static::Int64,dynamic::Int64}}[(static=0,dynamic=0)],
33+
local_mem = Union{Missing,@NamedTuple{thread::Int64,total::Int64}}[(thread=0,total=0)],
34+
size = Union{Missing,Int64}[missing],
35+
),
36+
nvtx = (
37+
id = Int[],
38+
start = Float64[],
39+
type = Symbol[],
40+
tid = Int[],
41+
name = Union{Missing,String}[],
42+
domain = Union{Missing,String}[],
43+
),
44+
)
45+
show(devnull, dummy)
46+
end

0 commit comments

Comments
 (0)