Skip to content

Commit 711b2cd

Browse files
authored
Merge pull request #54 from NVIDIA/release/v0.8.0
release: nvmath-python-0.8.0
2 parents b92f2e6 + b8450e4 commit 711b2cd

File tree

681 files changed

+20574
-9476
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

681 files changed

+20574
-9476
lines changed

.markdownlint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
1+
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
1+
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

builder/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
1+
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
22
#
33
# SPDX-License-Identifier: Apache-2.0

builder/utils.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
1+
# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

55
import os
66
import sys
7+
import warnings
78

89
from setuptools.command.build_ext import build_ext as _build_ext
910

@@ -18,9 +19,9 @@ def detect_cuda_paths():
1819
# headers, and in the wheel case they are scattered in two wheels. When build
1920
# isolation is on, the build prefix is added to sys.path, but this is the only
2021
# implementation detail that we rely on.
22+
# TODO: move to cuda.pathfinder.
2123
potential_build_prefixes = (
22-
[os.path.join(p, "nvidia/cuda_runtime") for p in sys.path]
23-
+ [os.path.join(p, "nvidia/cuda_nvcc") for p in sys.path]
24+
[os.path.join(p, "nvidia/cu13") for p in sys.path]
2425
# internal/bindings depends on cuda_bindings cydriver,
2526
# which introduces dependency on cudaProfiler.h
2627
+ [os.path.join(p, "nvidia/cuda_profiler_api") for p in sys.path]
@@ -36,7 +37,12 @@ def check_path(header):
3637
cuda_paths.append(prefix)
3738
break
3839
else:
39-
raise RuntimeError(f"{header} not found")
40+
searched_paths = "\n ".join(potential_build_prefixes)
41+
warnings.warn(
42+
f"include/{header} not found in any of these paths:\n "
43+
f"{searched_paths}\n"
44+
"Compilation of nvmath-python may fail. Set CUDA_PATH to suppress this warning."
45+
)
4046

4147
check_path("cuda.h")
4248
check_path("crt/host_defines.h")

docs/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ clean:
1515
rm -rf $(BUILDDIR)
1616
rm -rf ${SOURCEDIR}/_xml/
1717
rm -rf ${SOURCEDIR}/bindings/generated/
18-
rm -rf ${SOURCEDIR}/fft/generated
19-
rm -rf ${SOURCEDIR}/linalg/generated
18+
rm -rf ${SOURCEDIR}/host-apis/**/generated
2019
rm -rf ${SOURCEDIR}/device-apis/generated
20+
rm -rf ${SOURCEDIR}/distributed-apis/**/generated
2121

2222
html: Makefile
2323
@echo BUILDDIR=${BUILDDIR}

docs/sphinx/_static/nvmath_override.css

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,34 @@
55
.bd-page-width {
66
max-width: 100rem; /* default is 88rem */
77
}
8+
9+
/* Experimental API styling */
10+
/* Add left border to the entire method/function/class marked as experimental */
11+
/* This covers both the signature and docstring */
12+
/* Uses theme's attention color variables that automatically adapt to light/dark mode */
13+
dl.py.method.experimental,
14+
dl.py.function.experimental,
15+
dl.py.class.experimental {
16+
border-left: 6px solid var(--pst-color-attention-bg);
17+
padding-left: 10px;
18+
}
19+
20+
/* Style the experimental marker box */
21+
/* This is a simple container created by .. experimental:: directive */
22+
/* Using CSS variables from the theme's attention/warning admonition style */
23+
/* These variables automatically change with the theme switcher */
24+
.experimental-marker {
25+
background-color: var(--pst-color-attention-bg);
26+
padding: 8px 12px;
27+
margin: 12px 0;
28+
border-radius: 4px;
29+
}
30+
31+
/* Style the paragraph inside the container */
32+
.experimental-marker > p {
33+
color: var(--pst-color-attention-text);
34+
font-weight: 700;
35+
margin: 0;
36+
padding-left: 8px;
37+
font-size: 0.95em;
38+
}

docs/sphinx/_static/switcher.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"version": "latest",
44
"url": "https://docs.nvidia.com/cuda/nvmath-python/latest"
55
},
6+
{
7+
"version": "0.8.0",
8+
"url": "https://docs.nvidia.com/cuda/nvmath-python/0.8.0"
9+
},
610
{
711
"version": "0.7.0",
812
"url": "https://docs.nvidia.com/cuda/nvmath-python/0.7.0"

docs/sphinx/bindings/cublasMp.rst

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@ Enums and constants
1212
.. autosummary::
1313
:toctree: generated/
1414

15-
ComputeType
16-
cuBLASMpError
15+
EmulationStrategy
1716
GridLayout
1817
MatmulAlgoType
1918
MatmulDescriptorAttribute
2019
MatmulEpilogue
2120
MatmulMatrixScale
22-
Operation
2321
Status
22+
cuBLASMpError
2423

2524
Functions
2625
*********
@@ -31,15 +30,34 @@ Functions
3130
create
3231
destroy
3332
stream_set
33+
stream_get
3434
get_version
35+
set_emulation_strategy
36+
get_emulation_strategy
3537
grid_create
3638
grid_destroy
3739
matrix_descriptor_create
40+
matrix_descriptor_init
3841
matrix_descriptor_destroy
42+
numroc
3943
matmul_descriptor_create
44+
matmul_descriptor_init
4045
matmul_descriptor_destroy
4146
matmul_descriptor_attribute_set
4247
matmul_descriptor_attribute_get
4348
matmul_buffer_size
4449
matmul
45-
numroc
50+
geadd_buffer_size
51+
geadd
52+
gemm_buffer_size
53+
gemm
54+
gemr2d_buffer_size
55+
gemr2d
56+
syrk_buffer_size
57+
syrk
58+
tradd_buffer_size
59+
tradd
60+
trmr2d_buffer_size
61+
trmr2d
62+
trsm_buffer_size
63+
trsm

docs/sphinx/bindings/nvpl.blas.rst

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,20 @@ Functions
2929
cgemm_batch
3030
cgemm_batch_strided
3131
chemm
32-
chemm_batch_strided
3332
cher2k
3433
cherk
3534
csymm
36-
csymm_batch_strided
3735
csyr2k
3836
csyrk
3937
ctrmm
40-
ctrmm_batch_strided
4138
ctrsm
4239
dgemm
4340
dgemm_batch
4441
dgemm_batch_strided
4542
dsymm
46-
dsymm_batch_strided
4743
dsyr2k
4844
dsyrk
4945
dtrmm
50-
dtrmm_batch_strided
5146
dtrsm
5247
get_max_threads
5348
get_version
@@ -57,23 +52,18 @@ Functions
5752
sgemm_batch
5853
sgemm_batch_strided
5954
ssymm
60-
ssymm_batch_strided
6155
ssyr2k
6256
ssyrk
6357
strmm
64-
strmm_batch_strided
6558
strsm
6659
zgemm
6760
zgemm_batch
6861
zgemm_batch_strided
6962
zhemm
70-
zhemm_batch_strided
7163
zher2k
7264
zherk
7365
zsymm
74-
zsymm_batch_strided
7566
zsyr2k
7667
zsyrk
7768
ztrmm
78-
ztrmm_batch_strided
7969
ztrsm

docs/sphinx/conf.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from sphinx.writers.html import HTMLTranslator
2828
from docutils.transforms import Transform
29+
from docutils.parsers.rst import Directive
2930
import docutils.nodes as nodes
3031

3132
import numpy as np
@@ -215,7 +216,6 @@ def autodoc_process_docstring(app, what, name, obj, options, lines):
215216
docs = {
216217
"abbreviation": MM_QUALIFIERS_DOCUMENTATION["abbreviation"],
217218
"conjugate": MM_QUALIFIERS_DOCUMENTATION["conjugate"],
218-
"transpose": MM_QUALIFIERS_DOCUMENTATION["transpose"],
219219
"uplo": MM_QUALIFIERS_DOCUMENTATION["uplo"],
220220
"diag": MM_QUALIFIERS_DOCUMENTATION["diag"],
221221
"incx": MM_QUALIFIERS_DOCUMENTATION["incx"],
@@ -312,6 +312,72 @@ def default_departure(self, node):
312312
default_priority = 800
313313

314314

315+
class ExperimentalDirective(Directive):
316+
"""
317+
Custom admonition for marking experimental APIs.
318+
319+
Usage in docstrings:
320+
.. experimental:: method # specify the API type
321+
.. experimental:: function
322+
.. experimental:: class
323+
.. experimental:: parameter
324+
325+
This creates a warning admonition with the standard experimental text.
326+
Note: this admonition is automatically detected by the mark_experimental_apis function,
327+
which adds the 'experimental' CSS class to the method/function/class.
328+
"""
329+
330+
# Directive does not expect indented content below it (e.g., no text block)
331+
has_content = False
332+
# Requires exactly one argument: the API type (see above)
333+
required_arguments = 1
334+
# No optional arguments allowed
335+
optional_arguments = 0
336+
337+
def run(self):
338+
# Get the API type from argument or default to "method"
339+
api_type = self.arguments[0]
340+
assert api_type in ["method", "function", "class", "parameter"], "Invalid API type"
341+
342+
text = f"This {api_type} is experimental and potentially subject to future changes."
343+
# Create a simple container div to inline the experimental text
344+
container = nodes.container()
345+
container += nodes.paragraph("", text)
346+
container["classes"].append("experimental-marker")
347+
return [container]
348+
349+
350+
def mark_experimental_apis(app, doctree, docname):
351+
"""
352+
Add 'experimental' CSS class to any method/function/class that contains
353+
the .. experimental:: directive.
354+
355+
This runs on the 'doctree-resolved' event, after the doctree is fully built.
356+
"""
357+
from sphinx import addnodes
358+
359+
for desc_node in doctree.traverse(addnodes.desc):
360+
# Get the desc_content child (the docstring content)
361+
desc_content_nodes = [n for n in desc_node.children if isinstance(n, addnodes.desc_content)]
362+
363+
if not desc_content_nodes:
364+
continue
365+
366+
content_node = desc_content_nodes[0]
367+
368+
# Check only direct child nodes of desc_content, skipping nested desc nodes
369+
for child in content_node.children:
370+
# Skip nested desc nodes entirely
371+
if isinstance(child, addnodes.desc):
372+
continue
373+
374+
# Check if this child has the experimental-marker class
375+
# (from .. experimental:: directive)
376+
if "experimental-marker" in child.get("classes", []):
377+
desc_node["classes"].append("experimental")
378+
break
379+
380+
315381
class NotebookHandler:
316382
def __init__(self):
317383
self.tmpdir = tempfile.mkdtemp()
@@ -347,8 +413,18 @@ def remove_notebook_copyright(self, app, docname, content):
347413
def setup(app):
348414
fixup_internal_alias()
349415
app.add_css_file("nvmath_override.css")
416+
app.add_directive("experimental", ExperimentalDirective)
350417
app.connect("autodoc-process-docstring", autodoc_process_docstring)
351418
app.connect("source-read", lambda *args, **kwargs: notebook_handler.remove_notebook_copyright(*args, **kwargs))
419+
420+
# Connect the experimental API marker to the doctree-resolved event.
421+
# doctree-resolved fires after the doc tree is fully built and
422+
# cross-references are resolved, allowing us to safely traverse and
423+
# modify nodes before HTML generation.
424+
# This detects methods/functions/classes with the "experimental-marker"
425+
# and adds the 'experimental' CSS class for styling (orange border).
426+
app.connect("doctree-resolved", mark_experimental_apis)
427+
352428
app.set_translator("html", DotBreakHtmlTranslator)
353429
app.add_autodocumenter(PatchedEnumDocumenter, override=True)
354430
app.add_post_transform(UnqualifiedTitlesTransform)
@@ -374,7 +450,7 @@ def fixup_internal_alias():
374450
"nvmath.device.FFT": "nvmath.device.FFT-class",
375451
"nvmath.device.Matmul": "nvmath.device.Matmul-class",
376452
"nvmath.linalg.advanced.Matmul": "nvmath.linalg.advanced.Matmul-class",
377-
"nvmath.linalg.generic.Matmul": "nvmath.linalg.generic.Matmul-class",
453+
"nvmath.linalg.Matmul": "nvmath.linalg.Matmul-class",
378454
"nvmath.distributed.linalg.advanced.Matmul": "nvmath.distributed.linalg.advanced.Matmul-class",
379455
"nvmath.distributed.fft.FFT": "nvmath.distributed.fft.FFT-class",
380456
"nvmath.distributed.reshape.Reshape": "nvmath.distributed.reshape.Reshape-class",

0 commit comments

Comments
 (0)