Skip to content

Commit 54401a5

Browse files
committed
Update on "Refactor use_triton_kernel to use nvfp4_quantize_kernel_choice"
Summary: This is to prefer the addition of flashinfer quantize kernel path in next PR Test Plan: python test/prototype/mx_formats/test_inference_workflow.py Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned]
2 parents d12d8b5 + b1f18fb commit 54401a5

2 files changed

Lines changed: 2 additions & 4 deletions

File tree

test/prototype/mx_formats/test_inference_workflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def test_inference_workflow_nvfp4(
188188
and quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK
189189
):
190190
pytest.skip("unsupported configuration")
191-
if use_triton_kernel and not use_dynamic_per_tensor_scale:
191+
if quantize_to_nvfp4_kernel_choice == QuantizeToNVFP4KernelChoice.MSLK and not use_dynamic_per_tensor_scale:
192192
pytest.skip("unsupported configuration")
193193

194194
if use_inference_mode and (

torchao/prototype/mx_formats/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from torchao.prototype.mx_formats.config import (
2-
ScaleCalculationMode,
3-
MXLinearConfig,
4-
MXLinearRecipeName,
52
QuantizeToNVFP4KernelChoice,
3+
ScaleCalculationMode,
64
)
75

86
# Note: Prototype and subject to change

0 commit comments

Comments
 (0)