Remove unified.py (Quantizer and TwoStepQuantizer ABCs)

jerryzh168 · jerryzh168 · commit 643bc80c97e9 · 2026-04-10T11:25:58.000-07:00
Summary: Delete `torchao/quantization/unified.py` which defined the `Quantizer` and `TwoStepQuantizer` abstract base classes. These were trivial ABCs that only declared method signatures (`quantize`, `prepare`, `convert`) which all subclasses already implement. Remove the base class inheritance from all subclasses and clean up imports. Test Plan: pytest test/quantization/test_qat.py -x pytest test/quantization/test_quant_api.py -x ghstack-source-id: a5c1952 Pull Request resolved: #4264
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -85,9 +85,6 @@
     quantize_affine,
 )
 from torchao.quantization.quantize_.workflows import Int4PackingFormat
-from torchao.quantization.unified import (
-    TwoStepQuantizer,
-)
 from torchao.quantization.utils import (
     _get_per_token_block_size,
     compute_error,
@@ -751,7 +748,7 @@ def test_qat_4w_quantizer(self):
                 ptq_state_dict[k], converted_state_dict[k], atol=0, rtol=0
             )
 
-    class _MyQATQuantizer(TwoStepQuantizer):
+    class _MyQATQuantizer:
         """
         Dummy quantizer that attaches a certain value to each nn.Linear's
         `_temp_quantizer_values` attribute.
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -47,8 +47,6 @@
     ModuleFqnToConfig,
     PerRow,
     PerTensor,
-    Quantizer,
-    TwoStepQuantizer,
     _replace_with_custom_fn_if_matches_filter,
 )
 from torchao.quantization.quant_primitives import MappingType
@@ -90,7 +88,7 @@ def capture_and_prepare(model, example_inputs):
     return m
 
 
-class XNNPackDynamicQuantizer(TwoStepQuantizer):
+class XNNPackDynamicQuantizer:
     def prepare(self, model: torch.nn.Module) -> torch.nn.Module:
         _replace_with_custom_fn_if_matches_filter(
             model,
@@ -110,7 +108,7 @@ def convert(self, model: torch.nn.Module) -> torch.nn.Module:
         return model
 
 
-class TorchCompileDynamicQuantizer(Quantizer):
+class TorchCompileDynamicQuantizer:
     def quantize(self, model: torch.nn.Module) -> torch.nn.Module:
         quantize_(model, Int8DynamicActivationInt8WeightConfig())
         return model
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -60,7 +60,6 @@
     IntxUnpackedToInt8Tensor,
 )
 from .transform_module import register_quantize_module_handler
-from .unified import Quantizer, TwoStepQuantizer
 from .utils import (
     compute_error,
 )
@@ -124,7 +123,5 @@
     "Int4WeightOnlyQuantizer",
     "Int8DynActInt4WeightQuantizer",
     "Int8DynActInt4WeightLinear",
-    "TwoStepQuantizer",
-    "Quantizer",
     "Float8MMConfig",
 ]
diff --git a/torchao/quantization/linear_quant_modules.py b/torchao/quantization/linear_quant_modules.py
@@ -21,7 +21,6 @@
     MappingType,
     dequantize_affine,
 )
-from .unified import Quantizer
 from .utils import (
     group_quantize_tensor_symmetric,
     groupwise_affine_quantize_tensor,
@@ -232,7 +231,7 @@ def replace_linear_int4(
     )
 
 
-class Int4WeightOnlyQuantizer(Quantizer):
+class Int4WeightOnlyQuantizer:
     def __init__(
         self,
         groupsize: int = 256,
@@ -532,7 +531,7 @@ def replace_linear_8da4w(
     )
 
 
-class Int8DynActInt4WeightQuantizer(Quantizer):
+class Int8DynActInt4WeightQuantizer:
     def __init__(
         self,
         groupsize: int = 256,
diff --git a/torchao/quantization/qat/api.py b/torchao/quantization/qat/api.py
@@ -8,7 +8,7 @@
 import logging
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional, Tuple
 
 import torch
 
@@ -17,7 +17,6 @@
     _QUANTIZE_CONFIG_HANDLER,
     register_quantize_module_handler,
 )
-from torchao.quantization.unified import TwoStepQuantizer
 
 from .embedding import FakeQuantizedEmbedding
 from .fake_quantize_config import (
@@ -420,7 +419,7 @@ def _from_intx_quantization_aware_training_transform(
         return mod
 
 
-class ComposableQATQuantizer(TwoStepQuantizer):
+class ComposableQATQuantizer:
     """
     Composable quantizer that users can use to apply multiple QAT quantizers easily.
     Quantizers will be applied in the order they are specified in the constructor.
@@ -440,7 +439,7 @@ class ComposableQATQuantizer(TwoStepQuantizer):
         model = my_quantizer.convert(model)
     """
 
-    def __init__(self, quantizers: List[TwoStepQuantizer]):
+    def __init__(self, quantizers: list):
         torch._C._log_api_usage_once("torchao.quantization.qat.ComposableQATQuantizer")
         self.quantizers = quantizers
 
diff --git a/torchao/quantization/qat/embedding.py b/torchao/quantization/qat/embedding.py
@@ -10,7 +10,6 @@
 import torch.nn.functional as F
 
 from torchao.quantization.quant_primitives import TorchAODType
-from torchao.quantization.unified import TwoStepQuantizer
 from torchao.quantization.utils import get_group_qparams_symmetric
 
 from .fake_quantize_config import (
@@ -136,7 +135,7 @@ def from_embedding(
 # ======================================
 
 
-class Int4WeightOnlyEmbeddingQATQuantizer(TwoStepQuantizer):
+class Int4WeightOnlyEmbeddingQATQuantizer:
     """
     Quantizer for performing QAT on a model, where embedding layers have
     int4 fake quantized grouped per channel weights.
diff --git a/torchao/quantization/qat/linear.py b/torchao/quantization/qat/linear.py
@@ -22,7 +22,6 @@
     TorchAODType,
     ZeroPointDomain,
 )
-from torchao.quantization.unified import TwoStepQuantizer
 from torchao.quantization.utils import get_group_qparams_symmetric
 from torchao.utils import _is_device
 
@@ -181,7 +180,7 @@ def disable_linear_fake_quant(mod: torch.nn.Module):
 # ===========================
 
 
-class _LegacyQATQuantizer(TwoStepQuantizer):
+class _LegacyQATQuantizer:
     """
     Base class for sharing common methods across legacy QAT quantizers.
     """
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -103,15 +103,12 @@
     MappingType,
     quantize_affine,
 )
-from .unified import Quantizer, TwoStepQuantizer
 
 logger = logging.getLogger(__name__)
 
 # TODO: revisit this list?
 __all__ = [
     "swap_conv2d_1x1_to_linear",
-    "Quantizer",
-    "TwoStepQuantizer",
     "Int4WeightOnlyQuantizer",
     "_get_subclass_inserter",
     "quantize_",
diff --git a/torchao/quantization/unified.py b/torchao/quantization/unified.py

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,6 @@`
`60`	`60`	`IntxUnpackedToInt8Tensor,`
`61`	`61`	`)`
`62`	`62`	`from .transform_module import register_quantize_module_handler`
`63`		`-from .unified import Quantizer, TwoStepQuantizer`
`64`	`63`	`from .utils import (`
`65`	`64`	`compute_error,`
`66`	`65`	`)`
`@@ -124,7 +123,5 @@`
`124`	`123`	`"Int4WeightOnlyQuantizer",`
`125`	`124`	`"Int8DynActInt4WeightQuantizer",`
`126`	`125`	`"Int8DynActInt4WeightLinear",`
`127`		`- "TwoStepQuantizer",`
`128`		`- "Quantizer",`
`129`	`126`	`"Float8MMConfig",`
`130`	`127`	`]`