-
Notifications
You must be signed in to change notification settings - Fork 50
Respect STAC cube:dimensions as source of truth in metadata_from_stac + align tests with dimension policy #867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 7 commits
ccd7f23
b49026b
b9abf3b
d83208c
f77982c
ad706de
833d1e7
64a1423
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException): | |
|
|
||
|
|
||
| # TODO: make these dimension classes immutable data classes | ||
| # TODO: align better with STAC datacube extension | ||
| # TODO: align/adapt/integrate with pystac's datacube extension implementation? | ||
| class Dimension: | ||
| """Base class for dimensions.""" | ||
|
|
||
|
|
@@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension: | |
|
|
||
| class SpatialDimension(Dimension): | ||
| # TODO: align better with STAC datacube extension: e.g. support "axis" (x or y) | ||
|
|
||
| DEFAULT_CRS = 4326 | ||
|
|
||
| def __init__( | ||
|
|
@@ -679,30 +676,16 @@ def metadata_from_stac(url: str) -> CubeMetadata: | |
| """ | ||
| Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` | ||
|
|
||
| Policy: | ||
| - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). | ||
| - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. | ||
|
|
||
| :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection | ||
| :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. | ||
| """ | ||
| stac_object = pystac.read_file(href=url) | ||
| bands = _StacMetadataParser().bands_from_stac_object(stac_object) | ||
|
|
||
| # At least assume there are spatial dimensions | ||
| # TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions? | ||
| dimensions = [ | ||
| SpatialDimension(name="x", extent=[None, None]), | ||
| SpatialDimension(name="y", extent=[None, None]), | ||
| ] | ||
|
|
||
| # TODO #743: conditionally include band dimension when there was actual indication of band metadata? | ||
| band_dimension = BandDimension(name="bands", bands=bands) | ||
| dimensions.append(band_dimension) | ||
|
|
||
| # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? | ||
| temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) | ||
| if temporal_dimension: | ||
| dimensions.append(temporal_dimension) | ||
|
|
||
| metadata = CubeMetadata(dimensions=dimensions) | ||
| return metadata | ||
| parser = _StacMetadataParser() | ||
| return parser.metadata_from_stac_object(stac_object) | ||
|
|
||
| # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) | ||
| # TODO: remove this once support for Python 3.7 and 3.8 is dropped | ||
|
|
@@ -760,39 +743,172 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa | |
| # Use caching trick to avoid duplicate warnings | ||
| self._warn = functools.lru_cache(maxsize=1000)(self._warn) | ||
|
|
||
| def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata: | ||
| """ | ||
| Build cube metadata from a STAC object. | ||
| """ | ||
| bands = self.bands_from_stac_object(stac_object) | ||
| dimensions = self.dimensions_from_stac_object(stac_object=stac_object, bands=bands) | ||
| return CubeMetadata(dimensions=dimensions) | ||
|
|
||
| def dimensions_from_stac_object(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why having |
||
| """ | ||
| Build dimension metadata from a STAC object. | ||
|
|
||
| Philosophy: | ||
| - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). | ||
| - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. | ||
| """ | ||
| if self.has_cube_dimensions(stac_object): | ||
| dimensions = self.parse_declared_dimensions(stac_object=stac_object, bands=bands) | ||
| if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance( | ||
| stac_object, (pystac.Collection, pystac.Item) | ||
| ): | ||
| dimensions.append(BandDimension(name="bands", bands=list(bands))) | ||
| return dimensions | ||
|
|
||
| dimensions: List[Dimension] = [ | ||
| SpatialDimension(name="x", extent=[None, None]), | ||
| SpatialDimension(name="y", extent=[None, None]), | ||
| TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_object)), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reuse |
||
| ] | ||
| if isinstance(stac_object, (pystac.Collection, pystac.Item)): | ||
| dimensions.append(BandDimension(name="bands", bands=list(bands))) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why conditionally add the band dimension here? |
||
| return dimensions | ||
|
|
||
| def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: | ||
| """ | ||
| Extract the temporal dimension from a STAC Collection/Item (if any) | ||
| """ | ||
| # TODO: also extract temporal dimension from assets? | ||
| if _PYSTAC_1_9_EXTENSION_INTERFACE: | ||
| if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): | ||
| temporal_dims = [ | ||
| (n, d.extent or [None, None]) | ||
| for (n, d) in stac_obj.ext.cube.dimensions.items() | ||
| if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL | ||
| if self.has_cube_dimensions(stac_obj): | ||
| temporal_dimensions = [ | ||
| d | ||
| for d in self.parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([])) | ||
| if isinstance(d, TemporalDimension) | ||
| ] | ||
| if len(temporal_dimensions) == 1: | ||
| return temporal_dimensions[0] | ||
|
|
||
| if isinstance(stac_obj, (pystac.Collection, pystac.Item)): | ||
| return TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_obj)) | ||
|
|
||
| def has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool: | ||
| cube_dimensions = self.cube_dimensions_dict(stac_object) | ||
| return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0 | ||
|
|
||
| def cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this has to be in the public interface |
||
| """ | ||
| Return raw cube:dimensions dict from a Collection/Item, or {}. | ||
| """ | ||
| if isinstance(stac_object, pystac.Item): | ||
| return stac_object.properties.get("cube:dimensions", {}) or {} | ||
| if isinstance(stac_object, pystac.Collection): | ||
| return stac_object.extra_fields.get("cube:dimensions", {}) or {} | ||
| return {} | ||
|
|
||
| def infer_temporal_extent(self, stac_object: pystac.STACObject) -> List[Optional[str]]: | ||
| """ | ||
| Best-effort temporal extent: | ||
| - Collection: extent.temporal interval | ||
| - Item: datetime or start/end | ||
| """ | ||
| if isinstance(stac_object, pystac.Collection) and stac_object.extent and stac_object.extent.temporal: | ||
| interval = stac_object.extent.temporal.intervals[0] | ||
| return [Rfc3339(propagate_none=True).normalize(d) for d in interval] | ||
|
|
||
| if isinstance(stac_object, pystac.Item): | ||
| props = getattr(stac_object, "properties", {}) or {} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just wondering: why using |
||
| dt_ = props.get("datetime") | ||
| if dt_: | ||
| norm = Rfc3339(propagate_none=True).normalize(dt_) | ||
| return [norm, norm] | ||
| start = props.get("start_datetime") | ||
| end = props.get("end_datetime") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't the "datetime" field a required field, so is this code even reachable? |
||
| if start or end: | ||
| return [ | ||
| Rfc3339(propagate_none=True).normalize(start), | ||
| Rfc3339(propagate_none=True).normalize(end), | ||
| ] | ||
| if len(temporal_dims) == 1: | ||
| name, extent = temporal_dims[0] | ||
| return TemporalDimension(name=name, extent=extent) | ||
| elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: | ||
| # No explicit "cube:dimensions": build fallback from "extent.temporal", | ||
| # with dimension name "t" (openEO API recommendation). | ||
| extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] | ||
| return TemporalDimension(name="t", extent=extent) | ||
| else: | ||
| if isinstance(stac_obj, pystac.Item): | ||
| cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) | ||
| elif isinstance(stac_obj, pystac.Collection): | ||
| cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) | ||
|
|
||
| return [None, None] | ||
|
|
||
| @staticmethod | ||
| def _safe_extent_from_pystac_cube_dim(dim) -> list: | ||
| """ | ||
| PySTAC cube dimension wrapper may raise if 'extent' is missing. | ||
| Also, depending on serialization/version, extent might live in extra_fields. | ||
| """ | ||
| try: | ||
| ext = dim.extent | ||
| except Exception: | ||
| ext = None | ||
|
|
||
| if not ext: | ||
| extra = getattr(dim, "extra_fields", {}) or {} | ||
| ext = extra.get("extent") | ||
|
|
||
| return ext or [None, None] | ||
|
|
||
| def parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as noted above, it looks weird to have this required |
||
| """ | ||
| Parse dimensions declared through cube:dimensions. | ||
| """ | ||
| if ( | ||
| _PYSTAC_1_9_EXTENSION_INTERFACE | ||
| and getattr(stac_object, "ext", None) is not None | ||
| and stac_object.ext.has("cube") | ||
| and hasattr(stac_object.ext, "cube") | ||
| ): | ||
| return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands) | ||
| return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands) | ||
|
|
||
| def _parse_cube_dimensions_from_pystac_extension( | ||
| self, stac_object: pystac.STACObject, bands: _BandList | ||
| ) -> List[Dimension]: | ||
| """ | ||
| Parse dimensions from PySTAC's cube extension wrapper (when present). | ||
| Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. | ||
| Everything else is treated as band-like. | ||
| """ | ||
| dimensions = [] | ||
| for name, dim in stac_object.ext.cube.dimensions.items(): | ||
| dim_type = getattr(dim, "dim_type", None) | ||
| extent = self._safe_extent_from_pystac_cube_dim(dim) | ||
|
|
||
| if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL: | ||
| dimensions.append(SpatialDimension(name=name, extent=extent)) | ||
| elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL: | ||
| dimensions.append(TemporalDimension(name=name, extent=extent)) | ||
| else: | ||
| cube_dimensions = {} | ||
| temporal_dims = [ | ||
| (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" | ||
| ] | ||
| if len(temporal_dims) == 1: | ||
| name, extent = temporal_dims[0] | ||
| return TemporalDimension(name=name, extent=extent) | ||
| dimensions.append(BandDimension(name=name, bands=list(bands))) | ||
|
|
||
| return dimensions | ||
|
|
||
| def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: | ||
| """ | ||
| Parse dimensions from raw cube:dimensions dict. | ||
| Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). | ||
| """ | ||
| dimensions = [] | ||
| cube_dimensions = self.cube_dimensions_dict(stac_object) | ||
|
|
||
| for name, dim in cube_dimensions.items(): | ||
| if not isinstance(dim, dict): | ||
| continue | ||
|
|
||
| dim_type = dim.get("type") | ||
| extent = dim.get("extent", [None, None]) | ||
|
|
||
| if dim_type == "spatial": | ||
| dimensions.append(SpatialDimension(name=name, extent=extent)) | ||
| elif dim_type == "temporal": | ||
| dimensions.append(TemporalDimension(name=name, extent=extent)) | ||
| elif dim_type in ("bands", "spectral"): | ||
| dimensions.append(BandDimension(name=name, bands=list(bands))) | ||
| else: | ||
| dimensions.append(Dimension(name=name, type=dim_type)) | ||
|
|
||
| return dimensions | ||
|
|
||
| def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band: | ||
| """Construct band from metadata in eo v1.1 style""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1156,7 +1156,10 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected): | |
| # TODO #738 real request mocking of STAC resources compatible with pystac? | ||
| path.write_text(json.dumps(test_stac)) | ||
| metadata = metadata_from_stac(str(path)) | ||
| assert metadata.band_names == expected | ||
| if expected: | ||
| assert metadata.band_names == expected | ||
| else: | ||
| assert not metadata.has_band_dimension() | ||
|
|
||
|
|
||
|
|
||
|
|
@@ -1253,7 +1256,73 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): | |
| assert isinstance(dim, TemporalDimension) | ||
| assert (dim.name, dim.extent) == expected | ||
| else: | ||
| assert not metadata.has_temporal_dimension() | ||
| # With openEO defaults, a temporal dimension name ('t') can exist even when extent is unknown. | ||
|
suriyahgit marked this conversation as resolved.
|
||
| # Depending on STAC input, pystac/datacube parsing can produce: | ||
| # - a degenerate extent [d, d] when a single `datetime` is present | ||
| # - an "unknown" extent [None, None] when there is no temporal info | ||
| assert metadata.has_temporal_dimension() | ||
| extent = metadata.temporal_dimension.extent | ||
| if extent is None: | ||
| pass | ||
| else: | ||
| assert isinstance(extent, list) and len(extent) == 2 | ||
| # Allow "unknown" temporal extent representation | ||
| if extent == [None, None]: | ||
| pass | ||
| else: | ||
| # Allow degenerate interval when a single datetime is available | ||
| assert extent[0] == extent[1] | ||
|
|
||
|
|
||
|
|
||
| # Dimension name resolution policy (STAC cube:dimensions vs openEO defaults) | ||
|
suriyahgit marked this conversation as resolved.
|
||
| @pytest.mark.parametrize( | ||
| ["stac_dict", "expected_dims"], | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can a STAC 1.1 style test case be added which uses the new 'bands' common metadata and no datacube extension? |
||
| [ | ||
| ( | ||
| # No cube:dimensions -> fall back to openEO default naming convention | ||
| StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}), | ||
| {"t", "bands", "y", "x"}, | ||
| ), | ||
| ( | ||
| # No cube:dimensions (item) -> fall back to openEO default naming convention | ||
| StacDummyBuilder.item( | ||
| properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]} | ||
| ), | ||
| {"t", "bands", "y", "x"}, | ||
| ), | ||
| ( | ||
| # cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys | ||
| StacDummyBuilder.collection( | ||
| cube_dimensions={ | ||
| "time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]}, | ||
| "band": {"type": "bands", "axis": "bands", "values": ["B01"]}, | ||
| "y": {"type": "spatial", "axis": "y", "extent": [0, 1]}, | ||
| "x": {"type": "spatial", "axis": "x", "extent": [0, 1]}, | ||
| } | ||
| ), | ||
| {"time", "band", "y", "x"}, | ||
| ), | ||
| ], | ||
| ) | ||
| def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims): | ||
| path = tmp_path / "stac.json" | ||
| # TODO #738 real request mocking of STAC resources compatible with pystac? | ||
| path.write_text(json.dumps(stac_dict)) | ||
| metadata = metadata_from_stac(str(path)) | ||
|
|
||
| got = tuple(metadata.dimension_names() or ()) | ||
|
|
||
| # Order-insensitive check: names only | ||
| assert set(got) == expected_dims | ||
|
|
||
| # Ensure the policy logic is exercised correctly: | ||
| # cube:dimensions can be located at root (collection) or in properties (item) | ||
| cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") | ||
| if cube_dims is None: | ||
| assert set(got) == {"t", "bands", "y", "x"} | ||
| else: | ||
| assert set(got) == set(cube_dims.keys()) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.