Deprecate AQT and related classes (#4074)

andrewor14 · danielvegamyhre · commit 79118237302c · 2026-03-13T22:23:52.000Z
**Summary:** Deprecate AffineQuantizedTensor, AQTTensorImpl, Layout, and all subclasses of the above in torchao.dtypes. We are planning to remove these classes in the future, so we deprecate them here in advance. ``` ● Here are all the classes that now have new deprecation warnings: Base classes (torchao/dtypes/utils.py): 1. Layout 2. PlainLayout 3. AQTTensorImpl torchao/dtypes/affine_quantized_tensor.py: 4. AffineQuantizedTensor torchao/dtypes/floatx/: 5. Float8Layout 6. Float8AQTTensorImpl 7. CutlassSemiSparseLayout 8. CutlassSemiSparseTensorImpl torchao/dtypes/uintx/: 9. TensorCoreTiledLayout 10. TensorCoreTiledAQTTensorImpl 11. SemiSparseLayout 12. SemiSparseAQTTensorImpl 13. Int4CPULayout 14. Int4CPUAQTTensorImpl 15. Int4XPULayout 16. Int4XPUAQTTensorImpl 17. QDQLayout 18. QDQTensorImpl 19. PlainAQTTensorImpl 20. PackedLinearInt8DynamicActivationIntxWeightLayout 21. PackedLinearInt8DynamicActivationIntxWeightAQTTensorImpl ``` More context here: #2752 **Test Plan:** Manual testing
diff --git a/benchmarks/prototype/moe_training/benchmark_scaled_grouped_mm_dq.py b/benchmarks/prototype/moe_training/benchmark_scaled_grouped_mm_dq.py
@@ -116,7 +116,17 @@ def run_experiment(
         requires_grad=True,
     ).transpose(-2, -1)
 
-    offs = generate_jagged_offs(G, total_M, multiple_of=1)
+    # Create config object from recipe
+    if isinstance(config.recipe, Float8TrainingRecipe):
+        quant_config = Float8TrainingOpConfig.from_recipe(config.recipe)
+        alignment_size = 16 if args.aligned else 1
+        # TODO: support pad_token_groups_for_grouped_mm option in Float8TrainingOpConfig
+    else:
+        quant_config = MXFP8TrainingOpConfig.from_recipe(config.recipe)
+        quant_config.pad_token_groups_for_grouped_mm = not args.aligned
+        alignment_size = 32 if args.aligned else 1
+
+    offs = generate_jagged_offs(G, total_M, multiple_of=alignment_size)
 
     # fwd_bwd bf16 benchmark + profiling
     bf16_fwd_bwd_us = bench_fwd_bwd_microseconds(
@@ -138,12 +148,6 @@ def run_experiment(
             profile_name="bf16_profile",
         )
 
-    # Create config object from recipe
-    if isinstance(config.recipe, Float8TrainingRecipe):
-        quant_config = Float8TrainingOpConfig.from_recipe(config.recipe)
-    else:
-        quant_config = MXFP8TrainingOpConfig.from_recipe(config.recipe)
-
     # fwd_bwd scaled benchmark + profiling
     scaled_fwd_bwd_us = bench_fwd_bwd_microseconds(
         _quantize_then_scaled_grouped_mm,
@@ -262,5 +266,11 @@ def main(args: argparse.Namespace):
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument("--compile", action="store_true")
     arg_parser.add_argument("--profile", action="store_true")
+    arg_parser.add_argument(
+        "--aligned",
+        action="store_true",
+        help="If true, token group sizes are pre-aligned, to simulate flow with HybridEP or similar",
+    )
+
     args = arg_parser.parse_args()
     main(args)
diff --git a/test/prototype/moe_training/test_training.py b/test/prototype/moe_training/test_training.py
@@ -37,6 +37,7 @@
 @pytest.mark.parametrize(
     "kernel_preference", [KernelPreference.AUTO, KernelPreference.EMULATED]
 )
+@pytest.mark.parametrize("token_groups_aligned", [False])
 @pytest.mark.parametrize(
     "recipe_config",
     [
@@ -74,6 +75,7 @@ def test_moe_training(
     target_fqns: list[str],
     compile: bool,
     kernel_preference: KernelPreference,
+    token_groups_aligned: bool,
     recipe_config: dict,
 ):
     (
@@ -110,6 +112,8 @@ def test_moe_training(
                 pytest.skip(
                     f"Skipping FP8 rowwise tests, only supported on compute capability 9.0 and found {torch.cuda.get_device_capability()}"
                 )
+        if not token_groups_aligned:
+            pytest.skip("FP8 rowwise doesn't support per group token padding yet")
 
     # MXFP8 hardware path requires SM100
     if recipe in (
@@ -123,7 +127,11 @@ def test_moe_training(
             f"Skipping MXFP8 hardware mode tests, only supported on compute capability 10.0 and found {torch.cuda.get_device_capability()}"
         )
 
-    set_token_group_alignment_size_m(1)
+    alignment_size = 32 if isinstance(recipe, MXFP8TrainingRecipe) else 16
+    if not token_groups_aligned:
+        alignment_size = 1
+    set_token_group_alignment_size_m(alignment_size)
+
     model_args = MoEArgs(
         num_experts=8,
         num_shared_experts=1,
@@ -159,6 +167,11 @@ def moe_module_filter_fn(mod: nn.Module, cur_fqn: str) -> bool:
         else Float8TrainingOpConfig
     )
     config = config_cls.from_recipe(recipe)
+
+    # TODO: support pad_token_groups_for_grouped_mm in Float8TrainingOpConfig
+    if isinstance(recipe, MXFP8TrainingRecipe) and not token_groups_aligned:
+        config.pad_token_groups_for_grouped_mm = True
+
     quantize_(model, config=config, filter_fn=moe_module_filter_fn)
 
     # validate that only the experts were converted
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -7,6 +7,7 @@
 
 import logging
 import math
+import warnings
 from typing import TYPE_CHECKING, Optional, Tuple, Union
 
 import torch
@@ -112,6 +113,9 @@ def __init__(
         if zero_point_domain is _DEFAULT_ZPD:
             zero_point_domain = ZeroPointDomain.INT
         torch._C._log_api_usage_once(str(type(self)))
+        warnings.warn(
+            "Deprecation: AffineQuantizedTensor is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         self.tensor_impl = tensor_impl
         self.block_size = block_size
         self.quant_min = quant_min
diff --git a/torchao/dtypes/floatx/cutlass_semi_sparse_layout.py b/torchao/dtypes/floatx/cutlass_semi_sparse_layout.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
 from dataclasses import dataclass
 from typing import Optional
 
@@ -42,6 +43,12 @@ def _same_metadata(
 class CutlassSemiSparseLayout(Layout):
     """Layout class for float8 2:4 sparsity layout for affine quantized tensor, for cutlass kernel."""
 
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: CutlassSemiSparseLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
+
     def pre_process(self, dense: torch.Tensor) -> torch.Tensor:
         # prune to 2:4 if not already
         from torchao.sparsity.utils import mask_creator
@@ -76,6 +83,9 @@ def __init__(
         scale: torch.Tensor,
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: CutlassSemiSparseTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         self.sparse = sparse
         self.meta = meta
         self.scale = scale
diff --git a/torchao/dtypes/floatx/float8_layout.py b/torchao/dtypes/floatx/float8_layout.py
@@ -69,6 +69,12 @@ class Float8Layout(Layout):
 
     mm_config: Optional[Float8MMConfig] = None
 
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: Float8Layout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
+
 
 _fallback_warning_shown = False
 
@@ -110,6 +116,9 @@ def __init__(
         transposed: bool,
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: Float8AQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of Float8DynamicActivationFloat8WeightConfig is deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2649 for more details"
         )
diff --git a/torchao/dtypes/uintx/int4_cpu_layout.py b/torchao/dtypes/uintx/int4_cpu_layout.py
@@ -29,7 +29,11 @@ class Int4CPULayout(Layout):
     Only for PyTorch version at least 2.6
     """
 
-    pass
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: Int4CPULayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
 
 
 @register_layout(Int4CPULayout)
@@ -75,6 +79,9 @@ def __init__(
         transposed: bool,
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: Int4CPUAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of Int4WeightOnlyConfig is deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2948 for more details"
         )
diff --git a/torchao/dtypes/uintx/int4_xpu_layout.py b/torchao/dtypes/uintx/int4_xpu_layout.py
@@ -158,7 +158,11 @@ def _linear_fp_act_uint4_weight_int8_zero_impl(input_tensor, weight_tensor, bias
 class Int4XPULayout(Layout):
     """Only for PyTorch version at least 2.7"""
 
-    pass
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: Int4XPULayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
 
 
 @register_layout(Int4XPULayout)
@@ -211,6 +215,9 @@ def __init__(
         scale: torch.Tensor = None,
         zero: torch.Tensor = None,
     ):
+        warnings.warn(
+            "Deprecation: Int4XPUAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of Int4WeightOnlyConfig is deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2948 for more details"
         )
diff --git a/torchao/dtypes/uintx/packed_linear_int8_dynamic_activation_intx_weight_layout.py b/torchao/dtypes/uintx/packed_linear_int8_dynamic_activation_intx_weight_layout.py
@@ -70,6 +70,9 @@ def __init__(
         self,
         target: Union[str, Target] = "auto",
     ):
+        warnings.warn(
+            "Deprecation: PackedLinearInt8DynamicActivationIntxWeightLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of IntxWeightOnlyConfig/Int8DynamicActivationIntxWeightConfig are deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2967 for more details"
         )
@@ -130,6 +133,9 @@ def __init__(
         packed_weight: torch.Tensor,
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: PackedLinearInt8DynamicActivationIntxWeightAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         assert isinstance(_layout, PackedLinearInt8DynamicActivationIntxWeightLayout)
         self.packed_weight = packed_weight
         self._layout = _layout
diff --git a/torchao/dtypes/uintx/plain_layout.py b/torchao/dtypes/uintx/plain_layout.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
 from typing import Optional, Tuple
 
 import torch
@@ -77,6 +78,10 @@ def __init__(
         zero_point: Optional[torch.Tensor],
         _layout: Layout,
     ):
+        if type(self) is PlainAQTTensorImpl:
+            warnings.warn(
+                "Deprecation: PlainAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+            )
         self.int_data = int_data
         self.scale = scale
         self.zero_point = zero_point
diff --git a/torchao/dtypes/uintx/q_dq_layout.py b/torchao/dtypes/uintx/q_dq_layout.py
@@ -40,7 +40,11 @@
 
 @dataclass(frozen=True)
 class QDQLayout(Layout):
-    pass
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: QDQLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
 
 
 def _same_metadata(self: "QDQTensorImpl", src: "QDQTensorImpl") -> bool:
@@ -96,6 +100,9 @@ def __init__(
         zero_point: Optional[torch.Tensor],
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: QDQTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of IntxWeightOnlyConfig/Int8DynamicActivationIntxWeightConfig are deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2967 for more details"
         )
diff --git a/torchao/dtypes/uintx/semi_sparse_layout.py b/torchao/dtypes/uintx/semi_sparse_layout.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
 from dataclasses import dataclass
 from typing import Optional
 
@@ -78,6 +79,12 @@ class SemiSparseLayout(Layout):
     tensors to conform to this sparsity pattern.
     """
 
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: SemiSparseLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
+
     def pre_process(self, input: torch.Tensor) -> torch.Tensor:
         # prune to 2:4 if not already
         temp = input.detach()
@@ -92,6 +99,18 @@ class SemiSparseAQTTensorImpl(PlainAQTTensorImpl):
     TensorImpl for semi_sparse_cusparselt layout for affine quantized tensor
     """
 
+    def __init__(
+        self,
+        int_data: torch.Tensor,
+        scale: torch.Tensor,
+        zero_point: Optional[torch.Tensor],
+        _layout: Layout,
+    ):
+        warnings.warn(
+            "Deprecation: SemiSparseAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
+        super().__init__(int_data, scale, zero_point, _layout)
+
     @classmethod
     def __torch_dispatch__(cls, func, types, args, kwargs):
         kwargs = {} if kwargs is None else kwargs
diff --git a/torchao/dtypes/uintx/tensor_core_tiled_layout.py b/torchao/dtypes/uintx/tensor_core_tiled_layout.py
@@ -188,6 +188,12 @@ def post_process(
         )
         return input, scale, zero_point
 
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: TensorCoreTiledLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
+
     def extra_repr(self):
         return f"inner_k_tiles={self.inner_k_tiles}"
 
@@ -241,6 +247,9 @@ def __init__(
         transposed: bool,
         _layout: Layout,
     ):
+        warnings.warn(
+            "Deprecation: TensorCoreTiledAQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
         warnings.warn(
             "Models quantized with version 1 of Int4WeightOnlyConfig is deprecated and will no longer be supported in a future release, please upgrade torchao and quantize again, or download a newer torchao checkpoint, see https://github.com/pytorch/ao/issues/2948 for more details"
         )
diff --git a/torchao/dtypes/utils.py b/torchao/dtypes/utils.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
 from dataclasses import dataclass
 from typing import Optional, Tuple, Union
 
@@ -70,6 +71,10 @@ def extra_repr(self) -> str:
 
     def __post_init__(self):
         torch._C._log_api_usage_once(str(type(self)))
+        if type(self) is Layout:
+            warnings.warn(
+                "Deprecation: Layout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+            )
 
 
 @dataclass(frozen=True)
@@ -79,7 +84,11 @@ class PlainLayout(Layout):
     Typically, this layout is used as the default when no specific layout is required.
     """
 
-    pass
+    def __post_init__(self):
+        super().__post_init__()
+        warnings.warn(
+            "Deprecation: PlainLayout is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+        )
 
 
 def is_device(target_device_str: str, device: Union[str, torch.device]):
@@ -110,6 +119,12 @@ class AQTTensorImpl(TorchAOBaseTensor):
     the underlying implementation of a AQT based on layout
     """
 
+    def __init__(self, *args, **kwargs):
+        if type(self) is AQTTensorImpl:
+            warnings.warn(
+                "Deprecation: AQTTensorImpl is deprecated and will be removed in a future release of torchao, see https://github.com/pytorch/ao/issues/2752 for more details"
+            )
+
     def get_plain(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
         """Get the plain (unpacked) Tensor for the tensor impl
 
diff --git a/torchao/prototype/moe_training/config.py b/torchao/prototype/moe_training/config.py
diff --git a/torchao/prototype/moe_training/mxfp8_grouped_mm.py b/torchao/prototype/moe_training/mxfp8_grouped_mm.py