Skip to content

Commit 8696dc1

Browse files
committed
Update base for Update on "Refactor use_triton_kernel to use nvfp4_quantize_kernel_choice"
Summary: This is to prefer the addition of flashinfer quantize kernel path in next PR Test Plan: python test/prototype/mx_formats/test_inference_workflow.py Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned]
2 parents 3d41297 + 15df843 commit 8696dc1

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

test/quantization/pt2e/test_x86inductor_fusion.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3093,13 +3093,19 @@ def matcher_check_fn():
30933093

30943094
@skipIfNoDynamoSupport
30953095
@skipIfNoONEDNN
3096+
@unittest.skipIf(
3097+
torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
3098+
)
30963099
def test_q_attention_block(self):
30973100
for annotate_matmul in [True, False]:
30983101
self._test_q_attention_block_helper(annotate_matmul=annotate_matmul)
30993102

31003103
@skipIfNoDynamoSupport
31013104
@skipIfNoONEDNN
31023105
@skipIfNoFloat8Support
3106+
@unittest.skipIf(
3107+
torch_version_at_least("2.11.0.dev"), "Doesn't work with torch 2.11.0.dev+"
3108+
)
31033109
def test_fp8_q_attention_block(self):
31043110
for annotate_matmul in [True, False]:
31053111
self._test_q_attention_block_helper(

0 commit comments

Comments
 (0)