Skip to content

Blocksparse CuTensor contraction backend#1721

Open
kmp5VT wants to merge 5 commits intoITensor:mainfrom
kmp5VT:kmp5/feature/cutensor
Open

Blocksparse CuTensor contraction backend#1721
kmp5VT wants to merge 5 commits intoITensor:mainfrom
kmp5VT:kmp5/feature/cutensor

Conversation

@kmp5VT
Copy link
Copy Markdown
Collaborator

@kmp5VT kmp5VT commented Apr 3, 2026

No description provided.

@github-actions
Copy link
Copy Markdown
Contributor

github-actions bot commented Apr 3, 2026

Your PR requires formatting changes to meet the project's style guidelines.
Please run the ITensorFormatter to apply these changes.

Click here to view the suggested changes.
diff --git a/NDTensors/ext/NDTensorscuTENSORExt/contract.jl b/NDTensors/ext/NDTensorscuTENSORExt/contract.jl
index 64ffa26b..7ed580ed 100644
--- a/NDTensors/ext/NDTensorscuTENSORExt/contract.jl
+++ b/NDTensors/ext/NDTensorscuTENSORExt/contract.jl
@@ -1,7 +1,6 @@
 using Base: ReshapedArray
 using NDTensors.Expose: Exposed, expose, unexpose
-using NDTensors: NDTensors, BlockSparseTensor, DenseTensor, array,
-blockdims, data, eachnzblock, inds, nblocks, nzblocks
+using NDTensors: NDTensors, BlockSparseTensor, DenseTensor, array, blockdims, data, eachnzblock, inds, nblocks, nzblocks
 using cuTENSOR: cuTENSOR, CuArray, CuTensor
 
 # Handle cases that can't be handled by `cuTENSOR.jl`
@@ -32,10 +31,11 @@ function ITensor_to_cuTensorBS(T::BlockSparseTensor)
     nzblock_coords_t1 = [Int64.(x.data) for x in nzblocks(T)]
     block_per_mode_t1 = length.(block_extents_t1)
     is = [i for i in 1:ndims(T)]
-    return cuTENSOR.CuTensorBS(blocks_t1, block_per_mode_t1, block_extents_t1, nzblock_coords_t1, is);
+    return cuTENSOR.CuTensorBS(blocks_t1, block_per_mode_t1, block_extents_t1, nzblock_coords_t1, is)
 end
 
-function NDTensors._contract!(R::Exposed{<:CuArray, <:BlockSparseTensor},
+function NDTensors._contract!(
+        R::Exposed{<:CuArray, <:BlockSparseTensor},
         labelsR,
         tensor1::Exposed{<:CuArray, <:BlockSparseTensor},
         labelstensor1,
@@ -44,9 +44,9 @@ function NDTensors._contract!(R::Exposed{<:CuArray, <:BlockSparseTensor},
         grouped_contraction_plan,
         executor,
     )
-    N1 = ndims(unexpose(tensor1)) 
-    N2 = ndims(unexpose(tensor2)) 
-    NR = ndims(unexpose(R)) 
+    N1 = ndims(unexpose(tensor1))
+    N2 = ndims(unexpose(tensor2))
+    NR = ndims(unexpose(R))
     if NDTensors.using_CuTensorBS() && (N1 > 0) && (N2 > 0) && (NR > 0)
         # println("Using new function")
         cuR = ITensor_to_cuTensorBS(unexpose(R))
@@ -61,14 +61,14 @@ function NDTensors._contract!(R::Exposed{<:CuArray, <:BlockSparseTensor},
         return R
     else
         return NDTensors._contract!(
-        unexpose(R),
-        labelsR,
-        unexpose(tensor1),
-        labelstensor1,
-        unexpose(tensor2),
-        labelstensor2,
-        grouped_contraction_plan,
-        executor,
+            unexpose(R),
+            labelsR,
+            unexpose(tensor1),
+            labelstensor1,
+            unexpose(tensor2),
+            labelstensor2,
+            grouped_contraction_plan,
+            executor,
         )
     end
 end
diff --git a/NDTensors/src/NDTensors.jl b/NDTensors/src/NDTensors.jl
index e7a60688..919ec437 100644
--- a/NDTensors/src/NDTensors.jl
+++ b/NDTensors/src/NDTensors.jl
@@ -241,7 +241,6 @@ end
 
 function backend_octavian end
 
-
 _using_CuTensorBS = false
 
 using_CuTensorBS() = _using_CuTensorBS
diff --git a/NDTensors/src/blocksparse/contract_generic.jl b/NDTensors/src/blocksparse/contract_generic.jl
index 39b67fac..97afe393 100644
--- a/NDTensors/src/blocksparse/contract_generic.jl
+++ b/NDTensors/src/blocksparse/contract_generic.jl
@@ -71,19 +71,21 @@ function contract!(
     )
     return R
 end
-function _contract!(R::Exposed,
+function _contract!(
+        R::Exposed,
         labelsR,
         tensor1::Exposed,
         labelstensor1,
         tensor2::Exposed,
         labelstensor2,
         grouped_contraction_plan,
-        executor,
+        executor
     )
-    _contract!(unexpose(R), labelsR, 
-    unexpose(tensor1), labelstensor1,
-    unexpose(tensor2), labelstensor2,
-    grouped_contraction_plan,executor
+    return _contract!(
+        unexpose(R), labelsR,
+        unexpose(tensor1), labelstensor1,
+        unexpose(tensor2), labelstensor2,
+        grouped_contraction_plan, executor
     )
 end
 # Function barrier to improve type stability,

@mtfishman
Copy link
Copy Markdown
Member

Great to see this, thanks @kmp5VT. I guess this relies on JuliaGPU/CUDA.jl#3057? Once that is merged, would we just need to install the latest version of cuTENSOR/cuTENSOR.jl and the new backend in this PR "just works"?

@emstoudenmire
Copy link
Copy Markdown
Collaborator

Looks nice and very minimal. Thanks Karl!

@kmp5VT
Copy link
Copy Markdown
Collaborator Author

kmp5VT commented Apr 3, 2026

@mtfishman In NDTensors I also added a internal variable _cutensor_blocksparse which you can enable/disable during runtime. So if the variable is enabled plus you have the changes in my cuda branch then it will automatically work. And from my tests, the new backend produces noticeable speedups over the previous blocksparse GPU backends

@mtfishman mtfishman changed the title Kmp5/feature/cutensor Blocksparse CuTensor contraction backend Apr 14, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants