Skip to content

Commit a1bb31f

Browse files
committed
Update base for Update on "Add support for flashinfer quantize kernel option for nvfp4"
Summary: Added the flashinfer option for better performance on some of the workflow we are interested in, also added numerical equivalence test between different nvfp4_quantize_kernel_choice options Test Plan: pytest test/prototype/mx_formats/test_nvfp4_tensor.py -k test_kernel_preference_numerical_equivalence We'll test speedup a bit later Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned]
2 parents a4e7c33 + 15df843 commit a1bb31f

2 files changed

Lines changed: 10 additions & 1 deletion

File tree

test/prototype/mx_formats/test_inference_workflow.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,10 @@ def test_inference_workflow_nvfp4(
188188
and quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
189189
):
190190
pytest.skip("unsupported configuration")
191-
if quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK and not use_dynamic_per_tensor_scale:
191+
if (
192+
quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
193+
and not use_dynamic_per_tensor_scale
194+
):
192195
pytest.skip("unsupported configuration")
193196

194197
if use_inference_mode and (

test/quantization/pt2e/test_x86inductor_fusion.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3093,13 +3093,19 @@ def matcher_check_fn():
30933093

30943094
@skipIfNoDynamoSupport
30953095
@skipIfNoONEDNN
3096+
@unittest.skipIf(
3097+
torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
3098+
)
30963099
def test_q_attention_block(self):
30973100
for annotate_matmul in [True, False]:
30983101
self._test_q_attention_block_helper(annotate_matmul=annotate_matmul)
30993102

31003103
@skipIfNoDynamoSupport
31013104
@skipIfNoONEDNN
31023105
@skipIfNoFloat8Support
3106+
@unittest.skipIf(
3107+
torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
3108+
)
31033109
def test_fp8_q_attention_block(self):
31043110
for annotate_matmul in [True, False]:
31053111
self._test_q_attention_block_helper(

0 commit comments

Comments
 (0)