[pt2e] Skip linear+bn fusion when input is higher than 2-D (#4242)

Lidang-Jiang · web-flow · commit 4efd83a4ee09 · 2026-04-07T13:01:58.000-04:00
Linear always operates on the last dimension while BatchNorm1d normalizes along dim 1 (channels). These two coincide only for 2-D inputs (N, C). For higher-rank inputs like 3-D (N, C, L), fusing the BN parameters into Linear weights silently produces incorrect results because the scale/shift is applied along the wrong axis. Add an ndim check in _fuse_linear_bn_ that skips fusion and emits a warning when the linear input has more than 2 dimensions. Fixes #4116 Signed-off-by: Lidang-Jiang <lidangjiang@gmail.com>
diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py
@@ -11,6 +11,7 @@
 
 import copy
 import unittest
+import warnings
 
 import torch
 from torch import Tensor
@@ -212,6 +213,69 @@ def test_linear_bn_fusion(self):
                         torch.ops.aten.batch_norm.default,
                     )
 
+    def test_linear_bn_fusion_skipped_for_3d_input(self):
+        """Verify that Linear+BN fusion is skipped when input is >2-D.
+
+        When the linear input is 3-D (N, C, L), Linear operates on the last
+        dim while BatchNorm1d normalizes along dim 1.  Fusing them silently
+        produces incorrect results.  See https://github.com/pytorch/ao/issues/4116
+        """
+        for bias in [True, False]:
+            m = torch.nn.Sequential(
+                torch.nn.Linear(3, 5, bias=bias),
+                torch.nn.BatchNorm1d(5),
+            )
+            m.eval()
+            # 3-D input: (batch=2, channels=5, length=3)
+            example_inputs = (torch.randn(2, 5, 3),)
+            ref_outputs = m(*example_inputs)
+            traced_model = torch.export.export(m, example_inputs, strict=True).module()
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always")
+                prepared_model = prepare_pt2e(traced_model, XNNPACKQuantizer())
+                # Should emit a warning about skipping fusion
+                fusion_warnings = [
+                    x for x in w if "Not fusing linear+bn" in str(x.message)
+                ]
+                self.assertGreater(
+                    len(fusion_warnings),
+                    0,
+                    "Expected a warning about skipping 3-D linear+bn fusion",
+                )
+            prepared_outputs = prepared_model(*example_inputs)
+            # Outputs must match the reference (no silent corruption)
+            torch.testing.assert_close(
+                ref_outputs, prepared_outputs, atol=1e-5, rtol=1e-5
+            )
+
+    def test_linear_bn_fusion_correct_for_2d_input(self):
+        """Verify that 2-D Linear+BN fusion still works and BN is removed."""
+        for bias in [True, False]:
+            for N, M in [(8, 16), (5, 5)]:
+                m = torch.nn.Sequential(
+                    torch.nn.Linear(N, M, bias=bias),
+                    torch.nn.BatchNorm1d(M),
+                )
+                m.eval()
+                example_inputs = (torch.randn(4, N),)
+                ref_outputs = m(*example_inputs)
+                traced_model = torch.export.export(
+                    m, example_inputs, strict=True
+                ).module()
+                prepared_model = prepare_pt2e(traced_model, XNNPACKQuantizer())
+                prepared_outputs = prepared_model(*example_inputs)
+                torch.testing.assert_close(ref_outputs, prepared_outputs)
+                # BN nodes should be removed after fusion
+                for node in prepared_model.graph.nodes:
+                    self.assertNotEqual(
+                        node.target,
+                        torch.ops.aten._native_batch_norm_legit_no_training.default,
+                    )
+                    self.assertNotEqual(
+                        node.target,
+                        torch.ops.aten.batch_norm.default,
+                    )
+
     def test_wo_annotate_conv_output_quantizer(self):
         # TODO: use OP_TO_ANNOTATOR
         class BackendAQuantizer(Quantizer):
diff --git a/torchao/quantization/pt2e/utils.py b/torchao/quantization/pt2e/utils.py
@@ -970,6 +970,33 @@ def _fuse_linear_bn_(m: GraphModule) -> None:
         if not _is_linear_node(n):
             continue
         linear_node = n
+
+        # Linear+BN fusion is only valid when both layers operate on
+        # the same dimension.  Linear always acts on the last dim
+        # while BatchNorm1d acts on the channel dim (dim 1).  These
+        # two coincide only when the linear input is 2-D (N, C).
+        # For higher-rank inputs (e.g. 3-D (N, C, L)), BN normalises
+        # along dim 1 whereas Linear transforms the last dim, so
+        # fusing would silently produce incorrect results.
+        # See https://github.com/pytorch/ao/issues/4116
+        linear_input_node = linear_node.args[0]
+        if isinstance(linear_input_node, Node):
+            linear_input_val = linear_input_node.meta.get("val")
+            if (
+                linear_input_val is not None
+                and isinstance(linear_input_val, torch.Tensor)
+                and linear_input_val.ndim > 2
+            ):
+                warnings.warn(
+                    f"Not fusing linear+bn for node "
+                    f"'{linear_node.name}': the linear input "
+                    f"is {linear_input_val.ndim}-D so Linear "
+                    f"and BatchNorm operate on different "
+                    f"dimensions",
+                    stacklevel=1,
+                )
+                continue
+
         linear_weight_node = linear_node.args[1]
         linear_bias_node = linear_node.args[2] if len(linear_node.args) > 2 else None
         fold_bn_weights_into_linear_node(