Fix MetaTensor spatial_ndim propagation regressions

aymuos15 · aymuos15 · commit 9f359b08cbbc · 2026-03-04T12:08:22.000Z
- Clamp spatial_ndim only for true batch-only indexing
- Handle explicit no-channel metadata when normalizing rank
- Remove SplitDim double-decrement after affine sync
- Align batch-slice tests with batched MetaTensor metadata
- Extract DEFAULT_SPATIAL_NDIM constant to eliminate magic numbers
- Add documentation explaining spatial_ndim caching and affine sync

Signed-off-by: Soumya Snigdha Kundu &lt;soumya_snigdha.kundu@kcl.ac.uk&gt;
diff --git a/monai/data/meta_obj.py b/monai/data/meta_obj.py
@@ -24,6 +24,9 @@
 
 _TRACK_META = True
 
+# Default number of spatial dimensions for medical imaging (3D volumetric data)
+_DEFAULT_SPATIAL_NDIM = 3
+
 __all__ = ["get_track_meta", "set_track_meta", "MetaObj"]
 
 
diff --git a/monai/data/meta_tensor.py b/monai/data/meta_tensor.py
@@ -13,7 +13,7 @@
 
 import functools
 import warnings
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from copy import deepcopy
 from typing import Any
 
@@ -22,18 +22,27 @@
 
 import monai
 from monai.config.type_definitions import NdarrayOrTensor, NdarrayTensor
-from monai.data.meta_obj import MetaObj, get_track_meta
-from monai.data.utils import affine_to_spacing, decollate_batch, list_data_collate, remove_extra_metadata
+from monai.data.meta_obj import _DEFAULT_SPATIAL_NDIM, MetaObj, get_track_meta
+from monai.data.utils import affine_to_spacing, decollate_batch, is_no_channel, list_data_collate, remove_extra_metadata
 from monai.utils import look_up_option
 from monai.utils.enums import LazyAttr, MetaKeys, PostFix, SpaceKeys
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
 
 __all__ = ["MetaTensor", "get_spatial_ndim"]
 
 
-def _normalize_spatial_ndim(spatial_ndim: int, tensor_ndim: int) -> int:
+def _normalize_spatial_ndim(spatial_ndim: int, tensor_ndim: int, no_channel: bool = False) -> int:
     """Clamp spatial dims to a valid range for the current tensor shape."""
-    return max(1, min(int(spatial_ndim), max(int(tensor_ndim) - 1, 1)))
+    limit = max(int(tensor_ndim), 1) if no_channel else max(int(tensor_ndim) - 1, 1)
+    return max(1, min(int(spatial_ndim), limit))
+
+
+def _has_explicit_no_channel(meta: Mapping | None) -> bool:
+    return (
+        isinstance(meta, Mapping)
+        and MetaKeys.ORIGINAL_CHANNEL_DIM in meta
+        and is_no_channel(meta[MetaKeys.ORIGINAL_CHANNEL_DIM])
+    )
 
 
 def get_spatial_ndim(img: NdarrayOrTensor) -> int:
@@ -43,16 +52,22 @@ def get_spatial_ndim(img: NdarrayOrTensor) -> int:
     ``img.ndim - 1``.
     """
     if isinstance(img, MetaTensor):
-        inferred = _normalize_spatial_ndim(img.spatial_ndim, img.ndim)
-        shape_spatial = max(img.ndim - 1, 1)
-        # For non-batched tensors, preserve explicit higher-rank shape information
-        # (e.g., invalid 4D spatial inputs should still be reported as rank 4).
-        if not img.is_batch and shape_spatial > inferred:
-            return shape_spatial
-        return inferred
+        no_channel = _has_explicit_no_channel(img.meta)
+        return _normalize_spatial_ndim(img.spatial_ndim, img.ndim, no_channel=no_channel)
     return img.ndim - 1
 
 
+def _is_batch_only_index(index: Any) -> bool:
+    """True when indexing pattern selects only the batch axis (e.g., ``x[0]`` or ``x[0, ...]``)."""
+    if isinstance(index, (int, np.integer)):
+        return True
+    if not isinstance(index, Sequence) or not index:
+        return False
+    if not isinstance(index[0], (int, np.integer)):
+        return False
+    return all(i in (slice(None, None, None), Ellipsis, None) for i in index[1:])
+
+
 @functools.lru_cache(None)
 def _get_named_tuple_like_type(func):
     if (
@@ -184,11 +199,13 @@ def __init__(
             self.affine = self.meta[MetaKeys.AFFINE]
         else:
             self.affine = self.get_default_affine()
-        # derive spatial_ndim from affine, clamped by tensor shape
+        # Initialize spatial_ndim from affine matrix (source of truth), clamped by tensor shape.
+        # This cached value is kept in sync via the affine setter for hot-path performance.
+        no_channel = _has_explicit_no_channel(self.meta)
         if spatial_ndim is not None:
-            self.spatial_ndim = _normalize_spatial_ndim(spatial_ndim, self.ndim)
+            self.spatial_ndim = _normalize_spatial_ndim(spatial_ndim, self.ndim, no_channel=no_channel)
         elif self.affine.ndim == 2:
-            self.spatial_ndim = _normalize_spatial_ndim(self.affine.shape[-1] - 1, self.ndim)
+            self.spatial_ndim = _normalize_spatial_ndim(self.affine.shape[-1] - 1, self.ndim, no_channel=no_channel)
 
         # applied_operations
         if applied_operations is not None:
@@ -254,8 +271,6 @@ def update_meta(rets: Sequence, func, args, kwargs) -> Sequence:
                 #     raise NotImplementedError("torch.cat is not implemented for batch of MetaTensors.")
                 if is_batch:
                     ret = MetaTensor._handle_batched(ret, idx, metas, func, args, kwargs)
-                if func == torch.Tensor.__getitem__:
-                    ret.spatial_ndim = _normalize_spatial_ndim(ret.spatial_ndim, ret.ndim)
             out.append(ret)
         # if the input was a tuple, then return it as a tuple
         return tuple(out) if isinstance(rets, tuple) else out
@@ -271,6 +286,7 @@ def _handle_batched(cls, ret, idx, metas, func, args, kwargs):
         if func == torch.Tensor.__getitem__:
             if idx > 0 or len(args) < 2 or len(args[0]) < 1:
                 return ret
+            full_idx = args[1]
             batch_idx = args[1][0] if isinstance(args[1], Sequence) else args[1]
             # if using e.g., `batch[:, -1]` or `batch[..., -1]`, then the
             # first element will be `slice(None, None, None)` and `Ellipsis`,
@@ -292,6 +308,8 @@ def _handle_batched(cls, ret, idx, metas, func, args, kwargs):
                 ret_meta.is_batch = False
             if hasattr(ret_meta, "__dict__"):
                 ret.__dict__ = ret_meta.__dict__.copy()
+                if _is_batch_only_index(full_idx):
+                    ret.spatial_ndim = _normalize_spatial_ndim(ret.spatial_ndim, ret.ndim, no_channel=False)
         # `unbind` is used for `next(iter(batch))`. Also for `decollate_batch`.
         # But we only want to split the batch if the `unbind` is along the 0th dimension.
         elif func == torch.Tensor.unbind:
@@ -501,16 +519,26 @@ def affine(self) -> torch.Tensor:
 
     @affine.setter
     def affine(self, d: NdarrayTensor) -> None:
-        """Set the affine."""
+        """Set the affine.
+
+        When setting a non-batched affine matrix, automatically synchronizes the cached
+        spatial_ndim attribute to maintain consistency between the affine matrix (source of truth)
+        and the cached spatial dimension count.
+        """
         a = torch.as_tensor(d, device=torch.device("cpu"), dtype=torch.float64)
         self.meta[MetaKeys.AFFINE] = a
-        if a.ndim == 2:  # non-batched: sync spatial_ndim
-            self.spatial_ndim = _normalize_spatial_ndim(a.shape[-1] - 1, self.ndim)
+        if a.ndim == 2:  # non-batched: sync spatial_ndim from affine (source of truth)
+            no_channel = _has_explicit_no_channel(self.meta)
+            self.spatial_ndim = _normalize_spatial_ndim(a.shape[-1] - 1, self.ndim, no_channel=no_channel)
 
     @property
     def spatial_ndim(self) -> int:
-        """Get the number of spatial dimensions."""
-        return getattr(self, "_spatial_ndim", 3)
+        """Get the number of spatial dimensions.
+
+        This value is cached for hot-path performance and is kept in sync with the affine matrix
+        via the affine setter. The affine matrix is the source of truth for spatial dimensions.
+        """
+        return getattr(self, "_spatial_ndim", _DEFAULT_SPATIAL_NDIM)
 
     @spatial_ndim.setter
     def spatial_ndim(self, val: int) -> None:
diff --git a/monai/data/utils.py b/monai/data/utils.py
@@ -31,7 +31,7 @@
 from torch.utils.data._utils.collate import default_collate
 
 from monai.config.type_definitions import NdarrayOrTensor, NdarrayTensor, PathLike
-from monai.data.meta_obj import MetaObj
+from monai.data.meta_obj import _DEFAULT_SPATIAL_NDIM, MetaObj
 from monai.utils import (
     MAX_SEED,
     BlendMode,
@@ -432,7 +432,7 @@ def collate_meta_tensor_fn(batch, *, collate_fn_map=None):
     collated.meta = default_collate(meta_dicts)
     collated.applied_operations = [i.applied_operations or TraceKeys.NONE for i in batch]
     collated.is_batch = True
-    collated.spatial_ndim = min(getattr(batch[0], "spatial_ndim", 3), max(collated.ndim - 1, 1))
+    collated.spatial_ndim = min(getattr(batch[0], "spatial_ndim", _DEFAULT_SPATIAL_NDIM), max(collated.ndim - 1, 1))
     return collated
 
 
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
@@ -625,6 +625,10 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         img = convert_to_tensor(img, track_meta=get_track_meta())
         img_: torch.Tensor = convert_to_tensor(img, track_meta=False)
         spatial_dims = get_spatial_ndim(img)
+        # Validate actual tensor shape against tracked spatial_ndim
+        actual_spatial = img_.ndim - 1  # channel-first layout
+        if actual_spatial != spatial_dims:
+            spatial_dims = actual_spatial
         img_ = img_.unsqueeze(0)  # adds a batch dim
         if spatial_dims == 2:
             kernel = torch.tensor([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=torch.float32)
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
@@ -1036,6 +1036,9 @@ def inverse_transform(self, data: torch.Tensor, transform) -> torch.Tensor:
         out = convert_to_dst_type(out, dst=data, dtype=out.dtype)[0]
         if isinstance(out, MetaTensor):
             affine = convert_to_tensor(out.peek_pending_affine(), track_meta=False)
+            # Use affine matrix shape directly (not spatial_ndim) because the affine may be
+            # larger than the spatial dimensions (e.g., 4x4 for 2D data), and we need to match
+            # the actual affine matrix rank being composed
             mat = to_affine_nd(len(affine) - 1, transform_t)
             out.affine @= convert_to_dst_type(mat, affine)[0]
         return out
@@ -2352,6 +2355,8 @@ def inverse(self, data: torch.Tensor) -> torch.Tensor:
             out = MetaTensor(out)
         out.meta = data.meta  # type: ignore
         affine = convert_data_type(out.peek_pending_affine(), torch.Tensor)[0]
+        # Use affine matrix shape directly (not spatial_ndim) to ensure matrix composition compatibility
+        # when affine is larger than spatial dimensions (e.g., 4x4 for 2D data)
         xform, *_ = convert_to_dst_type(
             Affine.compute_w_affine(len(affine) - 1, inv_affine, data.shape[1:], orig_size), affine
         )
@@ -2621,6 +2626,8 @@ def inverse(self, data: torch.Tensor) -> torch.Tensor:
             out = MetaTensor(out)
         out.meta = data.meta  # type: ignore
         affine = convert_data_type(out.peek_pending_affine(), torch.Tensor)[0]
+        # Use affine matrix shape directly (not spatial_ndim) to ensure matrix composition compatibility
+        # when affine is larger than spatial dimensions (e.g., 4x4 for 2D data)
         xform, *_ = convert_to_dst_type(
             Affine.compute_w_affine(len(affine) - 1, inv_affine, data.shape[1:], orig_size), affine
         )
@@ -3035,10 +3042,11 @@ def __call__(
             raise ValueError("the spatial size of `img` does not match with the length of `distort_steps`")
 
         all_ranges = []
-        num_cells = ensure_tuple_rep(self.num_cells, get_spatial_ndim(img))
+        _sp = get_spatial_ndim(img)
+        num_cells = ensure_tuple_rep(self.num_cells, _sp)
         if isinstance(img, MetaTensor) and img.pending_operations:
             warnings.warn("MetaTensor img has pending operations, transform may return incorrect results.")
-        for dim_idx, dim_size in enumerate(img.shape[1:]):
+        for dim_idx, dim_size in enumerate(img.shape[1 : 1 + _sp]):
             dim_distort_steps = distort_steps[dim_idx]
             ranges = torch.zeros(dim_size, dtype=torch.float32)
             cell_size = dim_size // num_cells[dim_idx]
diff --git a/monai/transforms/utility/array.py b/monai/transforms/utility/array.py
@@ -334,8 +334,6 @@ def __call__(self, img: torch.Tensor) -> list[torch.Tensor]:
                 shift = torch.eye(ndim, device=out.affine.device, dtype=out.affine.dtype)
                 shift[dim - 1, -1] = idx
                 out.affine = out.affine @ shift
-                if not self.keepdim:
-                    out.spatial_ndim = max(1, out.spatial_ndim - 1)
         return outputs
 
 
diff --git a/tests/data/meta_tensor/test_spatial_ndim.py b/tests/data/meta_tensor/test_spatial_ndim.py
@@ -123,6 +123,8 @@ def test_lazy_apply_pending_2d(self):
 
     def test_batch_slice_clamps_spatial_ndim(self):
         t = MetaTensor(torch.randn(10, 6, 5, 7), affine=torch.eye(4))
+        t.is_batch = True
+        t.meta["affine"] = torch.eye(4)[None].repeat(10, 1, 1)
         self.assertEqual(t.spatial_ndim, 3)
         sliced = t[0]
         self.assertEqual(sliced.shape, (6, 5, 7))
@@ -131,12 +133,16 @@ def test_batch_slice_clamps_spatial_ndim(self):
 
     def test_label_to_contour_batch_slice_2d(self):
         t = MetaTensor(torch.randint(0, 2, (10, 6, 5, 7)).float(), affine=torch.eye(4))
+        t.is_batch = True
+        t.meta["affine"] = torch.eye(4)[None].repeat(10, 1, 1)
         sliced = t[0]
         out = LabelToContour()(sliced)
         self.assertEqual(out.shape, sliced.shape)
 
     def test_rand_zoom_batch_slice_2d(self):
         t = MetaTensor(torch.randn(10, 1, 64, 64), affine=torch.eye(4))
+        t.is_batch = True
+        t.meta["affine"] = torch.eye(4)[None].repeat(10, 1, 1)
         sliced = t[0]
         zoom = RandZoom(prob=1.0, min_zoom=0.6, max_zoom=1.2)
         zoom.set_random_state(seed=0)