Add parameter_name support to _int4_weight_only_transform (#3901)

jcaip · web-flow · commit d1fa9a2d86df · 2026-02-24T15:48:01.000-08:00
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -40,6 +40,7 @@
     Int4WeightOnlyConfig,
     Int8DynamicActivationInt8WeightConfig,
     Int8DynamicActivationIntxWeightConfig,
+    Int8StaticActivationInt8WeightConfig,
     Int8WeightOnlyConfig,
     IntxWeightOnlyConfig,
     ModuleFqnToConfig,
@@ -1036,6 +1037,36 @@ def __init__(self):
         assert isinstance(m.nested.linear.weight, AffineQuantizedTensor)
         assert isinstance(m.linear1.weight, AffineQuantizedTensor)
 
+    def test_fqn_to_config_non_weight_param(self):
+        configs = [
+            Int4WeightOnlyConfig(group_size=128),
+            Int8WeightOnlyConfig(),
+            Int8StaticActivationInt8WeightConfig(),
+            Float8WeightOnlyConfig(),
+            Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()),
+        ]
+        for config in configs:
+            with self.subTest(config=type(config).__name__):
+                model = torch.nn.Sequential(
+                    torch.nn.Linear(128, 128).to(torch.bfloat16).cuda()
+                )
+                model[0].register_parameter(
+                    "custom_param",
+                    torch.nn.Parameter(
+                        torch.randn(128, 128, dtype=torch.bfloat16, device="cuda")
+                    ),
+                )
+                original_custom_param = model[0].custom_param
+                original_weight = model[0].weight
+                quant_config = FqnToConfig({"0.custom_param": config})
+                quantize_(model, quant_config, filter_fn=None)
+                assert model[0].custom_param is not original_custom_param, (
+                    f"custom_param should be quantized for {type(config).__name__}"
+                )
+                assert model[0].weight is original_weight, (
+                    f"weight should be unchanged for {type(config).__name__}"
+                )
+
     def test_fqn_config_module_config_and_fqn_config_both_specified(self):
         with self.assertRaises(ValueError):
             FqnToConfig(
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -821,18 +821,34 @@ def _int4_weight_only_quantize_tensor(weight, config):
 
 @register_quantize_module_handler(Int4WeightOnlyConfig)
 def _int4_weight_only_transform(
-    module: torch.nn.Module, config: Int4WeightOnlyConfig
+    module: torch.nn.Module,
+    config: Int4WeightOnlyConfig,
+    *,
+    parameter_name: str = "weight",
 ) -> torch.nn.Module:
     if config.set_inductor_config:
         torchao.quantization.utils.recommended_inductor_config_setter()
 
-    assert hasattr(module, "weight"), (
-        "applying int8 weight only quant requires module to have weight attribute"
-        + " but {module} does not have one"
+    assert hasattr(module, parameter_name), (
+        f"applying int4 weight only quant requires module to have {parameter_name} attribute"
+        + f" but {module} does not have one"
+    )
+    new_weight = _int4_weight_only_quantize_tensor(
+        getattr(module, parameter_name), config
+    )
+    setattr(
+        module,
+        parameter_name,
+        torch.nn.Parameter(new_weight, requires_grad=False),
+    )
+    module.extra_repr = types.MethodType(
+        partial(
+            _module_extra_repr,
+            original_extra_repr=module.extra_repr,
+            parameter_name=parameter_name,
+        ),
+        module,
     )
-    new_weight = _int4_weight_only_quantize_tensor(module.weight, config)
-    module.weight = torch.nn.Parameter(new_weight, requires_grad=False)
-    module.extra_repr = types.MethodType(_linear_extra_repr, module)
     return module