Update on "Refactor use_triton_kernel to use nvfp4_quantize_kernel_choice"

jerryzh168 · jerryzh168 · commit 54401a545fcc · 2026-03-13T23:39:23.000-07:00
Summary:
This is to prefer the addition of flashinfer quantize kernel path in next PR

Test Plan:
python test/prototype/mx_formats/test_inference_workflow.py

Reviewers:

Subscribers:

Tasks:

Tags:

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_inference_workflow.py b/test/prototype/mx_formats/test_inference_workflow.py
@@ -188,7 +188,7 @@ def test_inference_workflow_nvfp4(
         and quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
     ):
         pytest.skip("unsupported configuration")
-    if use_triton_kernel and not use_dynamic_per_tensor_scale:
+    if quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK and not use_dynamic_per_tensor_scale:
         pytest.skip("unsupported configuration")
 
     if use_inference_mode and (
diff --git a/torchao/prototype/mx_formats/__init__.py b/torchao/prototype/mx_formats/__init__.py
@@ -1,8 +1,6 @@
 from torchao.prototype.mx_formats.config import (
-    ScaleCalculationMode,
-    MXLinearConfig,
-    MXLinearRecipeName,
     QuantizeToNVFP4KernelChoice,
+    ScaleCalculationMode,
 )
 
 # Note: Prototype and subject to change

Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,6 @@`
`1`	`1`	`from torchao.prototype.mx_formats.config import (`
`2`		`- ScaleCalculationMode,`
`3`		`- MXLinearConfig,`
`4`		`- MXLinearRecipeName,`
`5`	`2`	`QuantizeToNVFP4KernelChoice,`
	`3`	`+ ScaleCalculationMode,`
`6`	`4`	`)`
`7`	`5`
`8`	`6`	`# Note: Prototype and subject to change`