Update base for Update on "Add support for flashinfer quantize kernel option for nvfp4"

jerryzh168 · jerryzh168 · commit a1bb31f30054 · 2026-03-14T00:34:16.000-07:00
Summary:
Added the flashinfer option for better performance on some of the workflow
we are interested in, also added numerical equivalence test between different
nvfp4_quantize_kernel_choice options

Test Plan:
pytest test/prototype/mx_formats/test_nvfp4_tensor.py -k test_kernel_preference_numerical_equivalence

We'll test speedup a bit later

Reviewers:

Subscribers:

Tasks:

Tags:

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_inference_workflow.py b/test/prototype/mx_formats/test_inference_workflow.py
@@ -188,7 +188,10 @@ def test_inference_workflow_nvfp4(
         and quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
     ):
         pytest.skip("unsupported configuration")
-    if quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK and not use_dynamic_per_tensor_scale:
+    if (
+        quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
+        and not use_dynamic_per_tensor_scale
+    ):
         pytest.skip("unsupported configuration")
 
     if use_inference_mode and (
diff --git a/test/quantization/pt2e/test_x86inductor_fusion.py b/test/quantization/pt2e/test_x86inductor_fusion.py
@@ -3093,13 +3093,19 @@ def matcher_check_fn():
 
     @skipIfNoDynamoSupport
     @skipIfNoONEDNN
+    @unittest.skipIf(
+        torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
+    )
     def test_q_attention_block(self):
         for annotate_matmul in [True, False]:
             self._test_q_attention_block_helper(annotate_matmul=annotate_matmul)
 
     @skipIfNoDynamoSupport
     @skipIfNoONEDNN
     @skipIfNoFloat8Support
+    @unittest.skipIf(
+        torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
+    )
     def test_fp8_q_attention_block(self):
         for annotate_matmul in [True, False]:
             self._test_q_attention_block_helper(