[moe training] fix fp8 grouped mm compile issue (#4233)

rishisinhanj · DCCS-5881 · web-flow · commit d26bbae1c791 · 2026-04-06T09:06:44.000-07:00
[moe training] register Float8TrainingOpConfig as pytree constant for torch.compile Register Float8TrainingOpConfig as a pytree constant (matching MXFP8TrainingOpConfig) so torch.compile can properly handle the config stored in tensor subclass metadata via __tensor_flatten__. - Add @register_as_pytree_constant decorator to Float8TrainingOpConfig - Add __eq__ and __hash__ methods required for pytree constant registration Related: #4048 Made-with: Cursor Co-authored-by: DCCS-5881 <rissinha@chi-mi325x-pod2-103.ord.vultr.cpe.ice.amd.com>
diff --git a/torchao/prototype/moe_training/config.py b/torchao/prototype/moe_training/config.py
@@ -44,6 +44,7 @@ class TrainingOpBaseConfig(AOBaseConfig):
     pass
 
 
+@register_as_pytree_constant
 @dataclass
 class Float8TrainingOpConfig(TrainingOpBaseConfig):
     """
@@ -74,6 +75,25 @@ def from_recipe(
         else:
             raise ValueError(f"Unsupported FP8 recipe: {recipe}")
 
+    def __eq__(self, other):
+        if isinstance(other, Float8TrainingOpConfig):
+            return (
+                self.float8_dtype == other.float8_dtype
+                and self.out_dtype == other.out_dtype
+                and self.pad_token_groups_for_grouped_mm
+                == other.pad_token_groups_for_grouped_mm
+            )
+        return NotImplemented
+
+    def __hash__(self):
+        return hash(
+            (
+                self.float8_dtype,
+                self.out_dtype,
+                self.pad_token_groups_for_grouped_mm,
+            )
+        )
+
 
 # register as pytree constant so we can use dynamo nonstrict trace in torchao.prototype.moe_training.ep
 @register_as_pytree_constant