-
Notifications
You must be signed in to change notification settings - Fork 376
Expand file tree
/
Copy pathbench.yaml
More file actions
67 lines (61 loc) · 2.05 KB
/
bench.yaml
File metadata and controls
67 lines (61 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# # CCCL PR benchmark request config.
#
# ## Overview:
#
# This file is used to request benchmark comparisons in PR CI.
#
# This file must match ci/bench.template.yaml to merge.
# CI branch protections will fail if they differ. Reset before merging.
#
# To update the defaults (e.g. new GPU pools), modify both this file and
# ci/bench.template.yaml together in the same PR.
#
# !! Strongly consider appending the following to your **commit messages** while benchmarking. !!
# This prevents wasteful non-benchmark CI jobs if they are not needed.
#
# [bench-only]
#
# ## Quick start:
#
# 1. Add one or more benchmark regexes under benchmarks.filters.cub and/or
# benchmarks.filters.python.
# 2. Enable at least one GPU by uncommenting or adding entries in benchmarks.gpus.
# 3. Push and inspect the dispatched benchmark jobs/artifacts.
# 4. Remove/reset benchmark-request edits before final merge.
benchmarks:
# Benchmark filters grouped by project.
filters:
# CUB C++ benchmark filters (regex matched against ninja target names).
cub:
# Examples:
# - '^cub\.bench\.for_each\.base'
# - '^cub\.bench\.reduce\.(sum|min)\.'
# Python benchmark filters (regex matched against paths under benchmarks/).
python:
# Examples:
# - 'compute/reduce/sum\.py'
# - 'compute/transform/.*\.py'
# - 'coop/bench_warp_reduce\.py'
# Select GPUs. These are limited and shared, be intentional and conservative.
gpus:
# - "t4" # sm_75, 16 GB
# - "rtx2080" # sm_75, 8 GB
# - "rtxa6000" # sm_86, 48 GB
# - "l4" # sm_89, 24 GB
# - "rtx4090" # sm_89, 24 GB
# - "h100" # sm_90, 80 GB
# - "rtxpro6000" # sm_120
# Extra .devcontainer/launch.sh -d args
# launch_args: "--cuda 13.1 --host gcc14"
launch_args: "" # Latest nvcc + gcc
# Advanced:
base_ref: "origin/main"
test_ref: "HEAD"
arch: "native"
nvbench_args: >-
--timeout 30
--skip-time 15e-6
--stopping-criterion entropy
--throttle-threshold 90
--throttle-recovery-delay 0.15
nvbench_compare_args: ""