Skip to content

Commit ebfe81b

Browse files
brandonrosclaude
andcommitted
refactor(cuda_builder): gate Blackwell default arch on llvm19 cargo feature
Replace the LLVM_CONFIG_19 env-var sniffing in `cuda_builder` with a proper `llvm19` cargo feature, addressing review feedback on #375. - `nvvm` gains an `llvm19` feature; `NvvmArch`'s `#[default]` is moved off `Compute75` and onto `Compute100` via `cfg_attr` when it's enabled, so `NvvmArch::default()` returns the right answer for the active dialect. - `cuda_builder` gains a matching `llvm19` feature that propagates to `nvvm/llvm19` and (when the optional dep is on) `rustc_codegen_nvvm/llvm19`. `CudaBuilder::new` goes back to plain `NvvmArch::default()`. - The build script's nested `cargo build -p rustc_codegen_nvvm` now keys the `--features llvm19` flag off `cfg!(feature = "llvm19")` instead of the env var, so the toggle lives in one place. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3e35e85 commit ebfe81b

4 files changed

Lines changed: 26 additions & 26 deletions

File tree

crates/cuda_builder/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ default = []
1616
# HACK(see rust-gpu/spirv-builder): use `dep:` to avoid Cargo auto-creating a feature
1717
# with the dependency name. Consumers must explicitly opt-in to compiling the backend.
1818
rustc_codegen_nvvm = ["dep:rustc_codegen_nvvm"]
19+
# Build the backend against LLVM 19 instead of LLVM 7. Propagates to `nvvm` (which
20+
# uses it to flip the default `NvvmArch` to `Compute100`) and, when the optional
21+
# `rustc_codegen_nvvm` dep is also enabled, to `rustc_codegen_nvvm` itself. Even
22+
# when the optional dep is disabled, the build script's nested `cargo build -p
23+
# rustc_codegen_nvvm` reads `cfg(feature = "llvm19")` here and forwards it.
24+
llvm19 = ["nvvm/llvm19", "rustc_codegen_nvvm?/llvm19"]
1925

2026
[dependencies]
2127
rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm", optional = true }

crates/cuda_builder/src/lib.rs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -196,21 +196,6 @@ pub struct CudaBuilder {
196196
pub final_module_path: Option<PathBuf>,
197197
}
198198

199-
/// Default arch for new `CudaBuilder`s.
200-
///
201-
/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19`
202-
/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the
203-
/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy
204-
/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice.
205-
/// Callers can still override via [`CudaBuilder::arch`].
206-
fn default_arch() -> NvvmArch {
207-
if env::var_os("LLVM_CONFIG_19").is_some() {
208-
NvvmArch::Compute100
209-
} else {
210-
NvvmArch::default()
211-
}
212-
}
213-
214199
impl CudaBuilder {
215200
pub fn new(path_to_crate_root: impl AsRef<Path>) -> Self {
216201
Self {
@@ -219,7 +204,7 @@ impl CudaBuilder {
219204
ptx_file_copy_path: None,
220205
generate_line_info: true,
221206
nvvm_opts: true,
222-
arch: default_arch(),
207+
arch: NvvmArch::default(),
223208
ftz: false,
224209
fast_sqrt: false,
225210
fast_div: false,
@@ -370,7 +355,6 @@ impl CudaBuilder {
370355
/// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
371356
pub fn build(self) -> Result<PathBuf, CudaBuilderError> {
372357
println!("cargo:rerun-if-changed={}", self.path_to_crate.display());
373-
println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19");
374358
let path = invoke_rustc(&self)?;
375359
if let Some(copy_path) = self.ptx_file_copy_path {
376360
std::fs::copy(path, &copy_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?;
@@ -572,11 +556,10 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {
572556
.arg(&target_dir)
573557
.current_dir(&workspace_dir);
574558

575-
// Propagate the llvm19 cargo feature to the nested build when the surrounding
576-
// shell is configured for LLVM 19 (signalled by LLVM_CONFIG_19). Without this
577-
// rustc_codegen_nvvm's build.rs defaults to the LLVM 7 path and falls through
578-
// to the prebuilt LLVM 7 download, which fails on Linux.
579-
if env::var_os("LLVM_CONFIG_19").is_some() {
559+
// Propagate the `llvm19` cargo feature to the nested backend build. Without this
560+
// `rustc_codegen_nvvm`'s build script falls through to the prebuilt LLVM 7
561+
// download, which the LLVM 19 codegen path can't link against.
562+
if cfg!(feature = "llvm19") {
580563
cmd.args(["--features", "llvm19"]);
581564
}
582565

crates/nvvm/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ description = "High level bindings to libnvvm"
88
repository = "https://github.com/Rust-GPU/rust-cuda"
99
readme = "../../README.md"
1010

11+
[features]
12+
default = []
13+
# Match the `llvm19` feature on `rustc_codegen_nvvm`. Currently only flips the
14+
# default `NvvmArch` to the lowest Blackwell capability, since the LLVM 7
15+
# bitcode dialect can't target `compute_100+` and the LLVM 19 dialect can't
16+
# target pre-Blackwell archs.
17+
llvm19 = []
18+
1119
[dependencies]
1220
cust_raw = { version = "0.11.3", path = "../cust_raw", default-features = false, features = ["nvvm"] }
1321
strum = { version = "0.27", features = ["derive"] }

crates/nvvm/src/lib.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,12 +310,13 @@ pub enum NvvmArch {
310310
Compute73,
311311
/// This default value of 7.5 corresponds to Turing and later devices. We default to this
312312
/// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
313-
/// supported by CUDA 12.x.
313+
/// supported by CUDA 12.x. Selected as the default only when the `llvm19` feature is off;
314+
/// the LLVM 19 NVVM dialect can't target pre-Blackwell archs.
314315
// WARNING: If you change the default, consider updating:
315316
// - The `--target-arch` values used for compiletests in `ci_linux.yml` and
316317
// `.github/workflows/ci_{linux,windows}.yml`.
317318
// - The CUDA versions used in `setup_cuda_environment` in `compiletests`.
318-
#[default]
319+
#[cfg_attr(not(feature = "llvm19"), default)]
319320
Compute75,
320321
Compute80,
321322
Compute86,
@@ -326,8 +327,10 @@ pub enum NvvmArch {
326327
Compute90a,
327328
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
328329
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
329-
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch
330-
/// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set.
330+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. Selected as the default when
331+
/// the `llvm19` feature is enabled, since the LLVM 7 dialect can't target this and
332+
/// the LLVM 19 dialect can't target anything below it.
333+
#[cfg_attr(feature = "llvm19", default)]
331334
Compute100,
332335
Compute100f,
333336
Compute100a,

0 commit comments

Comments
 (0)