Remove tensor parallel test for v1 of Int8DynamicActivationInt8WeightConfig (#4169)

jerryzh168 · web-flow · commit 136cacbb0516 · 2026-03-24T21:52:56.000-07:00
Summary:
CI is failiing:
```
The error occurs because torch.compile with DTensor generates wrapper code that tries to access tensor_impl on the outer LinearActivationQuantizedTensor instead of the inner AffineQuantizedTensor. This is a
  torch.compile subclass handling issue with the deeply nested subclass hierarchy in version 1: DTensor(LinearActivationQuantizedTensor(AffineQuantizedTensor(...))).

  The test uses Int8DynamicActivationInt8WeightConfig() with default version=1, which creates this nested structure. Version 2 uses Int8Tensor which avoids the problematic nesting.
```

v2 doesn't support view op so it won't work either. We haven't heard of a use case for this actually, it's used in some demos but didn't hear about real users yet.

Therefore we delete the test in the PR, we'll be deleting v1 configs in the future as well.

Test Plan:
CI

Reviewers:

Subscribers:

Tasks:

Tags:

[ghstack-poisoned]
diff --git a/test/dtypes/test_affine_quantized_tensor_parallel.py b/test/dtypes/test_affine_quantized_tensor_parallel.py
@@ -18,7 +18,6 @@
     Float8DynamicActivationFloat8WeightConfig,
     Float8WeightOnlyConfig,
     Int4WeightOnlyConfig,
-    Int8DynamicActivationInt8WeightConfig,
     Int8WeightOnlyConfig,
     PerRow,
     PerTensor,
@@ -149,20 +148,8 @@ def test_tp(self, dtype):
         return self._test_tp(dtype)
 
 
-class TestInt8dqAffineQuantizedTensorParallel(TestAffineQuantizedTensorParallel):
-    QUANT_METHOD_FN = staticmethod(Int8DynamicActivationInt8WeightConfig)
-    COMMON_DTYPES = [torch.bfloat16]
-
-    @common_utils.parametrize("dtype", COMMON_DTYPES)
-    @with_comms
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    def test_tp(self, dtype):
-        return self._test_tp(dtype)
-
-
 common_utils.instantiate_parametrized_tests(TestInt8woAffineQuantizedTensorParallel)
 common_utils.instantiate_parametrized_tests(TestInt4woAffineQuantizedTensorParallel)
-common_utils.instantiate_parametrized_tests(TestInt8dqAffineQuantizedTensorParallel)
 
 # Float8 TP requires FP8-capable hardware (H100+ on CUDA, MI300+ on ROCm)
 from torchao.utils import is_MI300, is_MI350, is_sm_at_least_90