diff --git a/docs/reference/sql/_render_meta.py b/docs/reference/sql/_render_meta.py index 949273da6..bbf576098 100644 --- a/docs/reference/sql/_render_meta.py +++ b/docs/reference/sql/_render_meta.py @@ -187,6 +187,8 @@ def to_str(v): return " " elif v["t"] == "Para": return "".join(to_str(item) for item in v["c"]) + elif v["t"] == "RawInline": + return v["c"][1] if v["t"] == "Quoted": quote_type = v["c"][0]["t"] if quote_type == "SingleQuote": diff --git a/docs/reference/sql/rs_bandtodim.qmd b/docs/reference/sql/rs_bandtodim.qmd new file mode 100644 index 000000000..a4f3d3485 --- /dev/null +++ b/docs/reference/sql/rs_bandtodim.qmd @@ -0,0 +1,64 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_BandToDim +description: Collapses all bands into a single band with a new dimension. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name for the new dimension (e.g., 'time', 'band'). +--- + +## Description + +Collapses all bands in a raster into a single band by introducing a new +dimension. The new dimension is prepended to the existing dimensions, with +size equal to the number of bands. Band data is concatenated in band order. + +All bands must have identical dimension names, shapes, data types, and +nodata values. If any of these differ, an error is returned — collapsing +bands with different nodata sentinels would silently lose information +about which sentinel applies where. + +The new dimension name must not already exist on the input bands. For +example, `RS_BandToDim(raster_with_y_x, 'y')` is rejected because the +output would carry duplicate `y` dimensions and the round-trip through +[RS_DimToBand](rs_dimtoband.qmd) cannot recover the original layout. + +This is the inverse of [RS_DimToBand](rs_dimtoband.qmd). A round-trip +`RS_BandToDim(RS_DimToBand(raster, 'dim'), 'dim')` recovers the original +data layout. + +This is useful for converting GeoTIFF-style multi-band rasters into a single +N-dimensional chunk for export to formats like Zarr. + +## Examples + +``` +-- Collapse 3 RGB bands [y, x] into 1 band [band=3, y, x] +SELECT RS_BandToDim(raster, 'band'); + +-- Collapse time-step bands back into a time dimension +SELECT RS_BandToDim(raster, 'time'); + +-- Round-trip: split then recombine +SELECT RS_BandToDim(RS_DimToBand(raster, 'time'), 'time'); +``` diff --git a/docs/reference/sql/rs_dimnames.qmd b/docs/reference/sql/rs_dimnames.qmd new file mode 100644 index 000000000..cd65531a9 --- /dev/null +++ b/docs/reference/sql/rs_dimnames.qmd @@ -0,0 +1,49 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimNames +description: Returns the ordered list of dimension names for a raster band. +kernels: + - returns: list + args: [raster] + - returns: list + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the ordered list of dimension names for a raster band. Standard 2D +rasters have dimensions `["y", "x"]`. N-dimensional rasters may include +additional dimensions such as `time`, `pressure`, `wavelength`, etc. + +The dimension names correspond to the entries in +[RS_Shape](rs_shape.qmd) — the i-th name matches the i-th size. + +When the band index is omitted, all bands must agree on their dimension +names; if they disagree, an error is returned prompting the user to specify +a band index. + +## Examples + +```sql +SELECT RS_DimNames(RS_Example()); +``` diff --git a/docs/reference/sql/rs_dimsize.qmd b/docs/reference/sql/rs_dimsize.qmd new file mode 100644 index 000000000..4ec359c21 --- /dev/null +++ b/docs/reference/sql/rs_dimsize.qmd @@ -0,0 +1,73 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimSize +description: Returns the size of a named dimension in a raster band. +kernels: + - returns: int64 + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to query (e.g., 'x', 'y', 'time'). + - returns: int64 + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to query. + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the size of a named dimension in a raster. For example, +`RS_DimSize(raster, 'x')` returns the width and `RS_DimSize(raster, 'time')` +returns the number of time steps. + +This is equivalent to looking up a specific entry in +[RS_Shape](rs_shape.qmd) by name rather than by position. + +### Heterogeneous rasters + +Bands within a raster can have different dimensionality. When the band index +is omitted, `RS_DimSize` considers only the bands that carry the named +dimension ("pass-through"): + +- If at least one band carries the dimension and they all agree on its + size, that size is returned. +- If the bands that carry the dimension disagree on its size, an error is + returned prompting the user to specify a band index. +- If **no** band carries the dimension, an error is returned. This usually + signals a typo'd dimension name. + +When the band index is supplied explicitly, returns the size of the +dimension in that band, or `NULL` if the band does not carry it. + +## Examples + +```sql +-- RS_Example() produces a 2-D raster, so 'x' and 'y' are the only dims. +SELECT RS_DimSize(RS_Example(), 'x'); +``` + +```sql +SELECT RS_DimSize(RS_Example(), 'y'); +``` diff --git a/docs/reference/sql/rs_dimtoband.qmd b/docs/reference/sql/rs_dimtoband.qmd new file mode 100644 index 000000000..13bf509d8 --- /dev/null +++ b/docs/reference/sql/rs_dimtoband.qmd @@ -0,0 +1,63 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimToBand +description: Promotes a within-band dimension into separate bands. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to promote (e.g., 'wavelength', 'time'). +--- + +## Description + +Promotes a named non-spatial dimension into separate bands. Each index along +the dimension becomes its own band with that dimension removed. This bridges +the gap between the dimension model (where all indices live in one band) and +the band model (where each index is a separate band accessible by number). + +For example, a raster with 1 band of shape `[wavelength=200, y=256, x=256]` +becomes a raster with 200 bands, each of shape `[y=256, x=256]`. Standard +band math functions can then operate on individual wavelength bands by +index. + +Bands that do not contain the named dimension are passed through unchanged. +If **no** band carries the named dimension, the function errors — that +condition usually signals a typo'd dimension name. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be promoted. + +The inverse operation is [RS_BandToDim](rs_bandtodim.qmd). + +## Examples + +``` +-- Promote wavelength into separate bands for band math +SELECT RS_DimToBand(raster, 'wavelength'); + +-- Compute NDVI from specific wavelength bands +SELECT RS_NormalizedDifference( + RS_DimToBand(raster, 'wavelength'), 77, 54 +); + +-- Promote time steps into bands +SELECT RS_DimToBand(raster, 'time'); +``` diff --git a/docs/reference/sql/rs_numdimensions.qmd b/docs/reference/sql/rs_numdimensions.qmd new file mode 100644 index 000000000..ec54b9088 --- /dev/null +++ b/docs/reference/sql/rs_numdimensions.qmd @@ -0,0 +1,47 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_NumDimensions +description: Returns the number of dimensions in a raster band. +kernels: + - returns: int32 + args: [raster] + - returns: int32 + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the number of dimensions in a raster band. A standard 2D raster has 2 +dimensions (`y` and `x`). N-dimensional rasters (e.g., from Zarr or NetCDF +sources) may have additional dimensions such as `time`, `pressure`, or +`wavelength`. + +When the band index is omitted, all bands must agree on the number of +dimensions; if they disagree, an error is returned prompting the user to +specify a band index. + +## Examples + +```sql +SELECT RS_NumDimensions(RS_Example()); +``` diff --git a/docs/reference/sql/rs_shape.qmd b/docs/reference/sql/rs_shape.qmd new file mode 100644 index 000000000..6405cd2e6 --- /dev/null +++ b/docs/reference/sql/rs_shape.qmd @@ -0,0 +1,50 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_Shape +description: Returns the shape (size of each dimension) of a raster band. +kernels: + - returns: list + args: [raster] + - returns: list + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the shape of a raster band as a list of dimension sizes. The entries +correspond to the dimension names returned by +[RS_DimNames](rs_dimnames.qmd). + +For a standard 2D raster this returns `[height, width]`. For an N-dimensional +raster with a time dimension it might return `[12, 256, 256]` meaning 12 time +steps at 256x256 spatial resolution. + +When the band index is omitted, all bands must agree on their shape; if +they disagree, an error is returned prompting the user to specify a band +index. + +## Examples + +```sql +SELECT RS_Shape(RS_Example()); +``` diff --git a/docs/reference/sql/rs_slice.qmd b/docs/reference/sql/rs_slice.qmd new file mode 100644 index 000000000..0b545c2d5 --- /dev/null +++ b/docs/reference/sql/rs_slice.qmd @@ -0,0 +1,71 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_Slice +description: Selects a single index along a dimension, removing that dimension from the output. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to slice (e.g., 'time'). + - name: index + type: int + description: Zero-based index along the dimension. +--- + +## Description + +Extracts a single slice along a named non-spatial dimension, removing that +dimension from every band that carries it in the output raster. For example, +slicing a raster with shape `[time=12, y=256, x=256]` on `'time'` at index 5 +produces a 2D raster with shape `[y=256, x=256]` containing the data at time +step 5. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be sliced. + +### Heterogeneous rasters + +Bands within a raster can have different dimensionality. Bands that do not +carry the named dimension are emitted unchanged ("pass-through") rather than +producing an error. This matches the convention used by `RS_DimToBand` and +`xarray`'s `ds.isel({dim: i})`. + +For example, with a raster containing an elevation band (`[y, x]`) and a +temperature band (`[time, y, x]`), `RS_Slice(raster, 'time', 5)` slices the +temperature band and passes the elevation band through unchanged. + +If **no** band carries the named dimension, the function errors — that +condition usually signals a typo'd dimension name. + +Returns an error if the index is out of range for any band that does carry +the dimension. + +See also [RS_SliceRange](rs_slicerange.qmd) to extract a range of indices +while keeping the dimension. + +## Examples + +``` +-- Extract time step 5 from a [time=12, y, x] raster +SELECT RS_Slice(raster, 'time', 5); + +-- Extract the first pressure level +SELECT RS_Slice(raster, 'pressure', 0); +``` diff --git a/docs/reference/sql/rs_slicerange.qmd b/docs/reference/sql/rs_slicerange.qmd new file mode 100644 index 000000000..2cfaeaf66 --- /dev/null +++ b/docs/reference/sql/rs_slicerange.qmd @@ -0,0 +1,70 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_SliceRange +description: Narrows a dimension to a half-open range, keeping the dimension with reduced size. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to narrow (e.g., 'time'). + - name: start + type: int + description: Start index (inclusive, zero-based). + - name: end + type: int + description: End index (exclusive). +--- + +## Description + +Narrows a named non-spatial dimension to the half-open range `[start, end)` +on every band that carries it, keeping the dimension in the output with +reduced size. For example, narrowing a raster with shape +`[time=12, y=256, x=256]` on `'time'` with range `[2, 7)` produces a raster +with shape `[time=5, y=256, x=256]`. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be narrowed. + +### Heterogeneous rasters + +Bands within a raster can have different dimensionality. Bands that do not +carry the named dimension are emitted unchanged ("pass-through") rather than +producing an error. This matches the convention used by `RS_Slice`, +`RS_DimToBand`, and `xarray`'s `ds.isel`. + +If **no** band carries the named dimension, the function errors — that +condition usually signals a typo'd dimension name. + +Returns an error if `start >= end` or the range is out of bounds for any +band that does carry the dimension. + +See also [RS_Slice](rs_slice.qmd) to extract a single index and remove the +dimension entirely. + +## Examples + +``` +-- Extract time steps 0 through 4 from a [time=12, y, x] raster +SELECT RS_SliceRange(raster, 'time', 0, 5); + +-- Extract a range of pressure levels +SELECT RS_SliceRange(raster, 'pressure', 2, 8); +``` diff --git a/rust/sedona-raster-functions/src/lib.rs b/rust/sedona-raster-functions/src/lib.rs index dbc3749ed..2637347b4 100644 --- a/rust/sedona-raster-functions/src/lib.rs +++ b/rust/sedona-raster-functions/src/lib.rs @@ -22,6 +22,8 @@ pub mod register; pub mod rs_band_accessors; pub mod rs_bandpath; pub mod rs_convexhull; +pub mod rs_dim_band; +pub mod rs_dimensions; pub mod rs_envelope; pub mod rs_example; pub mod rs_georeference; @@ -31,6 +33,7 @@ pub mod rs_pixel_functions; pub mod rs_rastercoordinate; pub mod rs_setsrid; pub mod rs_size; +pub mod rs_slice; pub mod rs_spatial_predicates; pub mod rs_srid; pub mod rs_worldcoordinate; diff --git a/rust/sedona-raster-functions/src/register.rs b/rust/sedona-raster-functions/src/register.rs index e86fe8e98..c0dac9276 100644 --- a/rust/sedona-raster-functions/src/register.rs +++ b/rust/sedona-raster-functions/src/register.rs @@ -42,6 +42,12 @@ pub fn default_function_set() -> FunctionSet { crate::rs_band_accessors::rs_bandnodatavalue_udf, crate::rs_bandpath::rs_bandpath_udf, crate::rs_convexhull::rs_convexhull_udf, + crate::rs_dim_band::rs_dimtoband_udf, + crate::rs_dim_band::rs_bandtodim_udf, + crate::rs_dimensions::rs_numdimensions_udf, + crate::rs_dimensions::rs_dimnames_udf, + crate::rs_dimensions::rs_dimsize_udf, + crate::rs_dimensions::rs_shape_udf, crate::rs_envelope::rs_envelope_udf, crate::rs_example::rs_example_udf, crate::rs_georeference::rs_georeference_udf, @@ -59,15 +65,17 @@ pub fn default_function_set() -> FunctionSet { crate::rs_rastercoordinate::rs_worldtorastercoord_udf, crate::rs_rastercoordinate::rs_worldtorastercoordx_udf, crate::rs_rastercoordinate::rs_worldtorastercoordy_udf, - crate::rs_size::rs_height_udf, - crate::rs_size::rs_width_udf, crate::rs_setsrid::rs_set_crs_udf, crate::rs_setsrid::rs_set_srid_udf, - crate::rs_srid::rs_crs_udf, - crate::rs_srid::rs_srid_udf, + crate::rs_size::rs_height_udf, + crate::rs_size::rs_width_udf, + crate::rs_slice::rs_slice_udf, + crate::rs_slice::rs_slicerange_udf, crate::rs_spatial_predicates::rs_contains_udf, crate::rs_spatial_predicates::rs_intersects_udf, crate::rs_spatial_predicates::rs_within_udf, + crate::rs_srid::rs_crs_udf, + crate::rs_srid::rs_srid_udf, crate::rs_worldcoordinate::rs_rastertoworldcoord_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordx_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordy_udf, diff --git a/rust/sedona-raster-functions/src/rs_dim_band.rs b/rust/sedona-raster-functions/src/rs_dim_band.rs new file mode 100644 index 000000000..c5a8cc27e --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_dim_band.rs @@ -0,0 +1,768 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::error::Result; +use datafusion_common::{arrow_datafusion_err, exec_err}; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_raster::traits::RasterRef; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; +use crate::rs_slice::{extract_slice, require_any_band_has_dim}; + +// =========================================================================== +// RS_DimToBand +// =========================================================================== + +/// RS_DimToBand(raster, dim_name) -> Raster +/// +/// Expands each band that has the named dimension into multiple bands +/// (one per index along that dimension), removing that dimension from each. +/// Bands that do not have the named dimension are passed through unchanged. +/// Spatial dimensions cannot be expanded. +pub fn rs_dimtoband_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimtoband", + vec![Arc::new(RsDimToBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimToBand {} + +impl SedonaScalarKernel for RsDimToBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name)) => { + if name == raster.x_dim() || name == raster.y_dim() { + return exec_err!("RS_DimToBand: cannot expand spatial dimension '{name}'"); + } + + require_any_band_has_dim(raster, name, "RS_DimToBand")?; + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + let spatial_dims = raster.spatial_dims(); + new_builder.start_raster_nd( + &t, + &spatial_dims, + raster.spatial_shape(), + raster.crs(), + )?; + + for band_idx in 0..raster.num_bands() { + let band = raster + .band(band_idx) + .map_err(|e| arrow_datafusion_err!(e))?; + + let maybe_dim_idx = band.dim_index(name); + match maybe_dim_idx { + None => { + // Band doesn't have this dimension -- pass through + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let band_name = raster.band_name(band_idx); + new_builder.start_band_nd( + band_name, + &dim_name_refs, + band.shape(), + band.data_type(), + band.nodata(), + None, + None, + )?; + let data = band.contiguous_data()?; + new_builder.band_data_writer().append_value(&data); + new_builder.finish_band()?; + } + Some(dim_idx) => { + let dim_size = band.shape()[dim_idx]; + let new_dim_names: Vec<&str> = band + .dim_names() + .into_iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, n)| n) + .collect(); + let new_shape: Vec = band + .shape() + .iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, &s)| s) + .collect(); + + let orig_band_name = raster.band_name(band_idx); + + for idx in 0..dim_size { + let sliced_data = + extract_slice(band.as_ref(), dim_idx, idx, 1)?; + + let new_band_name = + orig_band_name.map(|n| format!("{n}_{name}_{idx}")); + let new_dim_name_refs: Vec<&str> = new_dim_names.to_vec(); + new_builder.start_band_nd( + new_band_name.as_deref(), + &new_dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + } + } + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// RS_BandToDim +// =========================================================================== + +/// RS_BandToDim(raster, dim_name) -> Raster +/// +/// Merges all bands into a single band by prepending a new dimension with +/// the given name. All bands must have identical dim_names, shape, and +/// data_type. The data from each band is concatenated in order. +pub fn rs_bandtodim_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_bandtodim", + vec![Arc::new(RsBandToDim {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsBandToDim {} + +impl SedonaScalarKernel for RsBandToDim { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name)) => { + let num_bands = raster.num_bands(); + if num_bands == 0 { + return exec_err!("RS_BandToDim: raster has no bands"); + } + + // Get reference band properties from band 0 + let band0 = raster.band(0).map_err(|e| arrow_datafusion_err!(e))?; + let ref_dim_names = band0.dim_names(); + let ref_shape = band0.shape().to_vec(); + let ref_data_type = band0.data_type(); + let ref_nodata: Option> = band0.nodata().map(|n| n.to_vec()); + + // The new dim name must not collide with an existing dim + // on the input bands; otherwise the output's dim_names + // would have duplicates and the round-trip through + // RS_DimToBand can't recover the original raster. + if ref_dim_names.contains(&name) { + return exec_err!( + "RS_BandToDim: dimension '{name}' already exists on the input bands; \ + pick a different name" + ); + } + + // Validate all bands match + for i in 1..num_bands { + let band = raster.band(i).map_err(|e| arrow_datafusion_err!(e))?; + if band.dim_names() != ref_dim_names { + return exec_err!( + "RS_BandToDim: band {i} has different dim_names than band 0" + ); + } + if band.shape() != ref_shape.as_slice() { + return exec_err!( + "RS_BandToDim: band {i} has different shape than band 0" + ); + } + if band.data_type() != ref_data_type { + return exec_err!( + "RS_BandToDim: band {i} has different data_type than band 0" + ); + } + // nodata must agree too — two bands with different + // sentinel values can't collapse into one output band + // without losing information about which sentinel + // applies where. + let band_nodata: Option> = band.nodata().map(|n| n.to_vec()); + if band_nodata != ref_nodata { + return exec_err!( + "RS_BandToDim: band {i} has different nodata value than band 0" + ); + } + } + + // Build new dim_names: [new_dim_name] + original_dim_names + let mut new_dim_names: Vec<&str> = Vec::with_capacity(ref_dim_names.len() + 1); + new_dim_names.push(name); + new_dim_names.extend(ref_dim_names.iter()); + + // Build new shape: [num_bands] + original_shape + let mut new_shape: Vec = Vec::with_capacity(ref_shape.len() + 1); + new_shape.push(num_bands as u64); + new_shape.extend_from_slice(&ref_shape); + + // Concatenate all band data + let mut concat_data = Vec::new(); + for i in 0..num_bands { + let band = raster.band(i).map_err(|e| arrow_datafusion_err!(e))?; + let data = band.contiguous_data()?; + concat_data.extend_from_slice(&data); + } + + let nodata = ref_nodata.as_deref(); + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + let spatial_dims = raster.spatial_dims(); + new_builder.start_raster_nd( + &t, + &spatial_dims, + raster.spatial_shape(), + raster.crs(), + )?; + new_builder.start_band_nd( + None, + &new_dim_names, + &new_shape, + ref_data_type, + nodata, + None, + None, + )?; + new_builder.band_data_writer().append_value(&concat_data); + new_builder.finish_band()?; + new_builder.finish_raster()?; + + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StructArray; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_raster::array::RasterStructArray; + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::RasterRef; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 3D raster with 1 band, shape [time, y, x], + /// and sequential UInt8 data. + fn build_3d_raster_sequential(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd( + &transform, + &["x", "y"], + &[width as i64, height as i64], + None, + ) + .unwrap(); + builder + .start_band_nd( + Some("temp"), + &["time", "y", "x"], + &[time, height, width], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let total = (time * height * width) as usize; + let data: Vec = (0..total).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a single-row 2D raster with N bands, each [y, x], with + /// sequential data starting at `band_idx * y * x`. + fn build_multi_band_2d(num_bands: usize, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd( + &transform, + &["x", "y"], + &[width as i64, height as i64], + None, + ) + .unwrap(); + let pixels = (height * width) as usize; + for b in 0..num_bands { + builder + .start_band_nd( + None, + &["y", "x"], + &[height, width], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let offset = b * pixels; + let data: Vec = (offset..offset + pixels).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + } + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + // ----------------------------------------------------------------------- + // RS_DimToBand + // ----------------------------------------------------------------------- + + #[test] + fn dimtoband_3d_to_bands() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // 1 band with shape [time=3, y=2, x=2], sequential data 0..12 + let rasters = build_3d_raster_sequential(3, 2, 2); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should have 3 bands, each 2D [y=2, x=2] + assert_eq!(raster.num_bands(), 3); + + for b in 0..3 { + let band = raster.band(b).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[2, 2]); + + let data = band.contiguous_data().unwrap(); + let offset = b * 4; + let expected: Vec = (offset..offset + 4).map(|i| i as u8).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + // Verify band names + assert_eq!(raster.band_name(0), Some("temp_time_0")); + assert_eq!(raster.band_name(1), Some("temp_time_1")); + assert_eq!(raster.band_name(2), Some("temp_time_2")); + } + + #[test] + fn dimtoband_spatial_dim_error() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_3d_raster_sequential(3, 2, 2); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot expand spatial dimension"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn dimtoband_null_raster() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } + + // ----------------------------------------------------------------------- + // RS_BandToDim + // ----------------------------------------------------------------------- + + #[test] + fn bandtodim_bands_to_3d() { + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // 3 bands, each [y=2, x=2], sequential data + let rasters = build_multi_band_2d(3, 2, 2); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "newdim") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should be 1 band with shape [newdim=3, y=2, x=2] + assert_eq!(raster.num_bands(), 1); + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["newdim", "y", "x"]); + assert_eq!(band.shape(), &[3, 2, 2]); + + // Data should be concatenation of all 3 bands + let data = band.contiguous_data().unwrap(); + let expected: Vec = (0..12).map(|i| i as u8).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn bandtodim_mismatched_shapes_error() { + // Build a raster whose bands disagree along a non-spatial axis. + // Both bands satisfy the raster's spatial extent (y=2, x=2) so the + // builder accepts them, but their time-axis sizes differ — that is + // what RS_BandToDim must reject. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[3, 2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 12]); + builder.finish_band().unwrap(); + + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[4, 2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 16]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + let rasters = builder.finish().unwrap(); + + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // Use a new dim name ("batch") that doesn't collide with the bands' + // existing dim names so the dim-collision check passes and we reach + // the shape-mismatch check. + let result = tester.invoke_array_scalar(Arc::new(rasters), "batch"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("different shape"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn bandtodim_mismatched_nodata_error() { + // Two bands match on dim_names/shape/data_type but disagree on + // nodata sentinel. RS_BandToDim must reject — collapsing them into + // one output band would silently inherit band 0's nodata and lose + // information about band 1's sentinel. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + Some(&[0u8]), + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 4]); + builder.finish_band().unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + Some(&[255u8]), + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 4]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let rasters = builder.finish().unwrap(); + + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + let result = tester.invoke_array_scalar(Arc::new(rasters), "stack"); + let err = result.unwrap_err().to_string(); + assert!(err.contains("different nodata"), "Unexpected error: {err}"); + } + + #[test] + fn bandtodim_new_dim_collides_with_existing_dim_error() { + // RS_BandToDim with a new dim name that already exists on the input + // bands would build dim_names with duplicates (e.g. ["y", "y", "x"]) + // and break the RS_DimToBand round-trip. Must reject up front. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + for _ in 0..2 { + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 4]); + builder.finish_band().unwrap(); + } + builder.finish_raster().unwrap(); + let rasters = builder.finish().unwrap(); + + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + // Try to create a new dim named "y" — collides with the existing y axis. + let result = tester.invoke_array_scalar(Arc::new(rasters), "y"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("dimension 'y' already exists"), + "Unexpected error: {err}" + ); + } + + #[test] + fn dimtoband_errors_when_no_band_has_dim() { + // Pre-flight: typo'd dim name surfaces as an explicit error rather + // than silently passing every band through unchanged. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value([0u8; 4]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let rasters = builder.finish().unwrap(); + + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + let result = tester.invoke_array_scalar(Arc::new(rasters), "nonexistent"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("no band has dimension 'nonexistent'"), + "Unexpected error: {err}" + ); + } + + #[test] + fn bandtodim_null_raster() { + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } + + // ----------------------------------------------------------------------- + // Round-trip: DimToBand -> BandToDim + // ----------------------------------------------------------------------- + + #[test] + fn round_trip_dimtoband_bandtodim() { + // Start with 1 band [time=3, y=2, x=2] + let rasters = build_3d_raster_sequential(3, 2, 2); + let raster_array_in = RasterStructArray::new(&rasters); + let raster_in = raster_array_in.get(0).unwrap(); + let original_data = raster_in + .band(0) + .unwrap() + .contiguous_data() + .unwrap() + .to_vec(); + + // DimToBand: 1 band [time=3, y=2, x=2] -> 3 bands [y=2, x=2] + let kernel = RsDimToBand {}; + let arg_types = vec![RASTER, SedonaType::Arrow(DataType::Utf8)]; + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ]; + let mid_result = kernel.invoke_batch(&arg_types, &args).unwrap(); + let mid_array = match mid_result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array"), + }; + + // BandToDim: 3 bands [y=2, x=2] -> 1 band [time=3, y=2, x=2] + let kernel2 = RsBandToDim {}; + let args2 = vec![ + ColumnarValue::Array(mid_array), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ]; + let final_result = kernel2.invoke_batch(&arg_types, &args2).unwrap(); + let final_array = match final_result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array"), + }; + let final_struct = final_array.as_any().downcast_ref::().unwrap(); + let raster_array_out = RasterStructArray::new(final_struct); + let raster_out = raster_array_out.get(0).unwrap(); + + // Verify shape and data match + let band_out = raster_out.band(0).unwrap(); + assert_eq!(band_out.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band_out.shape(), &[3, 2, 2]); + let round_trip_data = band_out.contiguous_data().unwrap(); + assert_eq!(round_trip_data.as_ref(), &original_data[..]); + } +} diff --git a/rust/sedona-raster-functions/src/rs_dimensions.rs b/rust/sedona-raster-functions/src/rs_dimensions.rs new file mode 100644 index 000000000..95a7b0083 --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_dimensions.rs @@ -0,0 +1,1042 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::{Int32Builder, Int64Builder, ListBuilder, StringBuilder}; +use arrow_schema::DataType; +use datafusion_common::cast::{as_int32_array, as_string_array}; +use datafusion_common::error::Result; +use datafusion_common::{arrow_datafusion_err, exec_err}; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::traits::RasterRef; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Check that all bands agree on a value. Returns the value from band 0, +/// or an error if any band disagrees. +fn check_band_agreement( + raster: &dyn RasterRef, + func_name: &str, + property_name: &str, + extractor: impl Fn(&dyn sedona_raster::traits::BandRef) -> T, +) -> Result { + if raster.num_bands() == 0 { + return exec_err!("{func_name}: raster has no bands"); + } + let band0 = raster.band(0).map_err(|e| arrow_datafusion_err!(e))?; + let value = extractor(band0.as_ref()); + for i in 1..raster.num_bands() { + if let Ok(band) = raster.band(i) { + let other = extractor(band.as_ref()); + if other != value { + return exec_err!( + "{func_name}: bands have different {property_name} — specify a band index" + ); + } + } + } + Ok(value) +} + +fn list_utf8_type() -> DataType { + DataType::List(Arc::new(arrow_schema::Field::new( + "item", + DataType::Utf8, + true, + ))) +} + +fn list_int64_type() -> DataType { + DataType::List(Arc::new(arrow_schema::Field::new( + "item", + DataType::Int64, + true, + ))) +} + +// =========================================================================== +// RS_NumDimensions +// =========================================================================== + +/// RS_NumDimensions(raster [, band]) -> Int32 +/// +/// Returns the number of dimensions in the raster (or a specific band). +pub fn rs_numdimensions_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_numdimensions", + vec![ + Arc::new(RsNumDimensions {}), + Arc::new(RsNumDimensionsWithBand {}), + ], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsNumDimensions {} + +impl SedonaScalarKernel for RsNumDimensions { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::Int32), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + builder.append_null(); + Ok(()) + } + Some(raster) => { + let ndim = + check_band_agreement(raster, "RS_NumDimensions", "dimensionality", |b| { + b.ndim() + })?; + builder.append_value(ndim as i32); + Ok(()) + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct RsNumDimensionsWithBand {} + +impl SedonaScalarKernel for RsNumDimensionsWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(DataType::Int32), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + builder.append_null(); + return Ok(()); + } + let band = raster + .band((band_index - 1) as usize) + .map_err(|e| arrow_datafusion_err!(e))?; + builder.append_value(band.ndim() as i32); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +// =========================================================================== +// RS_DimNames +// =========================================================================== + +/// RS_DimNames(raster [, band]) -> List +/// +/// Returns the dimension names of the raster (or a specific band). +pub fn rs_dimnames_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimnames", + vec![Arc::new(RsDimNames {}), Arc::new(RsDimNamesWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimNames {} + +impl SedonaScalarKernel for RsDimNames { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(list_utf8_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut list_builder = ListBuilder::new(StringBuilder::new()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + let names = check_band_agreement(raster, "RS_DimNames", "dimension names", |b| { + b.dim_names() + .iter() + .map(|s| s.to_string()) + .collect::>() + })?; + for name in &names { + list_builder.values().append_value(name); + } + list_builder.append(true); + Ok(()) + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +#[derive(Debug)] +struct RsDimNamesWithBand {} + +impl SedonaScalarKernel for RsDimNamesWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(list_utf8_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut list_builder = ListBuilder::new(StringBuilder::new()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + list_builder.append_null(); + return Ok(()); + } + let band = raster + .band((band_index - 1) as usize) + .map_err(|e| arrow_datafusion_err!(e))?; + for name in band.dim_names() { + list_builder.values().append_value(name); + } + list_builder.append(true); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +// =========================================================================== +// RS_DimSize +// =========================================================================== + +/// RS_DimSize(raster, dim_name [, band]) -> Int64 (nullable) +/// +/// Returns the size of the named dimension, or null if the dimension +/// does not exist. +pub fn rs_dimsize_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimsize", + vec![Arc::new(RsDimSize {}), Arc::new(RsDimSizeWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimSize {} + +impl SedonaScalarKernel for RsDimSize { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Arrow(DataType::Int64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut builder = Int64Builder::with_capacity(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + builder.append_null(); + Ok(()) + } + (Some(raster), Some(name)) => { + // Pass-through for bands that don't carry the named + // dimension: filter to bands that have it, then require + // those to agree on the size. If no band has the dim at + // all, error explicitly — `check_band_agreement` on the + // raw `Option` extractor would silently return NULL + // and a mixed Some/None set would report a misleading + // "different dimension sizes" error. + let mut iter = (0..raster.num_bands()).filter_map(|i| { + let band = raster.band(i).ok()?; + let size = band.dim_size(name)?; + Some((i, size)) + }); + let Some((first_idx, first_size)) = iter.next() else { + return exec_err!("RS_DimSize: no band has dimension '{name}'"); + }; + for (other_idx, other_size) in iter { + if other_size != first_size { + return exec_err!( + "RS_DimSize: bands have different sizes for dimension '{name}' \ + (band {first_idx} has {first_size}, band {other_idx} has \ + {other_size}) — specify a band index" + ); + } + } + builder.append_value(first_size as i64); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct RsDimSizeWithBand {} + +impl SedonaScalarKernel for RsDimSizeWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ], + SedonaType::Arrow(DataType::Int64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + let band_index_array = args[2].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut builder = Int64Builder::with_capacity(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + builder.append_null(); + Ok(()) + } + (Some(raster), Some(name)) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + builder.append_null(); + return Ok(()); + } + let band = raster + .band((band_index - 1) as usize) + .map_err(|e| arrow_datafusion_err!(e))?; + match band.dim_size(name) { + Some(s) => builder.append_value(s as i64), + None => builder.append_null(), + } + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +// =========================================================================== +// RS_Shape +// =========================================================================== + +/// RS_Shape(raster [, band]) -> List +/// +/// Returns the shape (size of each dimension) of the raster. +pub fn rs_shape_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_shape", + vec![Arc::new(RsShape {}), Arc::new(RsShapeWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsShape {} + +impl SedonaScalarKernel for RsShape { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(list_int64_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut list_builder = ListBuilder::new(Int64Builder::new()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + let shape = + check_band_agreement(raster, "RS_Shape", "shape", |b| b.shape().to_vec())?; + for &s in &shape { + list_builder.values().append_value(s as i64); + } + list_builder.append(true); + Ok(()) + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +#[derive(Debug)] +struct RsShapeWithBand {} + +impl SedonaScalarKernel for RsShapeWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(list_int64_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut list_builder = ListBuilder::new(Int64Builder::new()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + list_builder.append_null(); + return Ok(()); + } + let band = raster + .band((band_index - 1) as usize) + .map_err(|e| arrow_datafusion_err!(e))?; + for &s in band.shape() { + list_builder.values().append_value(s as i64); + } + list_builder.append(true); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, Int32Array, ListArray, StringArray, StructArray}; + use datafusion_expr::ScalarUDF; + use sedona_raster::builder::RasterBuilder; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 2D raster StructArray. + fn build_2d_raster(width: u64, height: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(width, height, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::Float32, None).unwrap(); + let data = vec![0u8; (width * height * 4) as usize]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a single-row 3D raster StructArray with shape [time, height, width]. + fn build_3d_raster(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd( + &transform, + &["x", "y"], + &[width as i64, height as i64], + None, + ) + .unwrap(); + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[time, height, width], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; (time * height * width * 4) as usize]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a raster where two bands both carry a `time` dimension but + /// disagree on its size. Used to exercise the "filtered bands disagree" + /// error path in `RS_DimSize`. + fn build_conflicting_time_size_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); + + // Band 0: time=3 + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 3 * 4 * 5 * 4]); + builder.finish_band().unwrap(); + + // Band 1: time=7 (disagrees with band 0) + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[7, 4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 7 * 4 * 5 * 4]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a raster with two bands that have different dimensionality. + fn build_mixed_dim_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); + + // Band 0: 2D [4, 5] + builder + .start_band_nd( + None, + &["y", "x"], + &[4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 4 * 5 * 4]); + builder.finish_band().unwrap(); + + // Band 1: 3D [3, 4, 5] + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 3 * 4 * 5 * 4]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + // ----------------------------------------------------------------------- + // RS_NumDimensions + // ----------------------------------------------------------------------- + + #[test] + fn numdimensions_2d() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(DataType::Int32); + + let rasters = build_2d_raster(4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 2); + } + + #[test] + fn numdimensions_3d() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn numdimensions_with_band() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Int32)]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar(Arc::new(rasters), 1_i32) + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn numdimensions_null_raster() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn numdimensions_mixed_bands_error() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different dimensionality"), + "Unexpected error: {err_msg}" + ); + } + + // ----------------------------------------------------------------------- + // RS_DimNames + // ----------------------------------------------------------------------- + + #[test] + fn dimnames_2d() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(list_utf8_type()); + + let rasters = build_2d_raster(4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let str_arr = values + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + assert_eq!(str_arr.len(), 2); + assert_eq!(str_arr.value(0), "y"); + assert_eq!(str_arr.value(1), "x"); + } + + #[test] + fn dimnames_3d() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let str_arr = values + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + assert_eq!(str_arr.len(), 3); + assert_eq!(str_arr.value(0), "time"); + assert_eq!(str_arr.value(1), "y"); + assert_eq!(str_arr.value(2), "x"); + } + + #[test] + fn dimnames_null_raster() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn dimnames_mixed_bands_error() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different dimension names"), + "Unexpected error: {err_msg}" + ); + } + + // ----------------------------------------------------------------------- + // RS_DimSize + // ----------------------------------------------------------------------- + + #[test] + fn dimsize_2d_x() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_2d_raster(5, 4); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x").unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 5); + } + + #[test] + fn dimsize_3d_time() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn dimsize_nonexistent_errors_when_no_band_has_dim() { + // RS_DimSize on a dim that no band carries surfaces an explicit + // error rather than silently returning NULL. Catches typo'd dim + // names. + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_2d_raster(4, 5); + let result = tester.invoke_array_scalar(Arc::new(rasters), "nonexistent"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("no band has dimension 'nonexistent'"), + "Unexpected error: {err}" + ); + } + + #[test] + fn dimsize_with_band() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int32), + ], + ); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 1_i32) + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn dimsize_null_raster() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x").unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn dimsize_errors_when_bands_with_dim_disagree_on_size() { + // Pass-through skips bands missing the dim, but the bands that DO + // have it must agree on the size — otherwise the answer is + // ambiguous and the user must specify a band index. + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_conflicting_time_size_raster(); + let result = tester.invoke_array_scalar(Arc::new(rasters), "time"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("bands have different sizes for dimension 'time'"), + "Unexpected error: {err}" + ); + } + + #[test] + fn dimsize_passes_through_bands_without_dim() { + // RS_DimSize on a heterogeneous raster: bands without the named + // dimension are skipped; the size is read from the bands that have + // it. Mirrors RS_Slice / RS_SliceRange / RS_DimToBand pass-through + // semantics and matches xarray's `ds.sizes`. + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // build_mixed_dim_raster: band 0 is 2D (no `time`), band 1 is 3D + // with `time=3`. RS_DimSize(raster, 'time') reports the size from + // band 1. + let rasters = build_mixed_dim_raster(); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 3); + } + + // ----------------------------------------------------------------------- + // RS_Shape + // ----------------------------------------------------------------------- + + #[test] + fn shape_2d() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(list_int64_type()); + + let rasters = build_2d_raster(5, 4); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let int_arr = values + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(int_arr.len(), 2); + assert_eq!(int_arr.value(0), 4); // height + assert_eq!(int_arr.value(1), 5); // width + } + + #[test] + fn shape_3d() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let int_arr = values + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(int_arr.len(), 3); + assert_eq!(int_arr.value(0), 3); // time + assert_eq!(int_arr.value(1), 4); // height + assert_eq!(int_arr.value(2), 5); // width + } + + #[test] + fn shape_null_raster() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn shape_mixed_bands_error() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different shape"), + "Unexpected error: {err_msg}" + ); + } +} diff --git a/rust/sedona-raster-functions/src/rs_slice.rs b/rust/sedona-raster-functions/src/rs_slice.rs new file mode 100644 index 000000000..4109ab0e3 --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_slice.rs @@ -0,0 +1,874 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_schema::DataType; +use datafusion_common::cast::{as_int64_array, as_string_array}; +use datafusion_common::error::Result; +use datafusion_common::{arrow_datafusion_err, exec_err}; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_raster::traits::{BandRef, RasterRef}; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; + +// =========================================================================== +// RS_Slice +// =========================================================================== + +/// RS_Slice(raster, dim_name, index) -> Raster +/// +/// Slices each band along the named dimension at the given index, removing +/// that dimension from the output. Spatial dimensions cannot be sliced. +pub fn rs_slice_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_slice", + vec![Arc::new(RsSlice {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsSlice {} + +impl SedonaScalarKernel for RsSlice { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let index_array = args[2].clone().cast_to(&DataType::Int64, None)?; + let index_array = index_array.into_array(executor.num_iterations())?; + let index_array = as_int64_array(&index_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut index_iter = index_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let index = index_iter.next().unwrap(); + + match (raster_opt, dim_name, index) { + (None, _, _) | (_, None, _) | (_, _, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name), Some(idx)) => { + if idx < 0 { + return exec_err!( + "RS_Slice: index must be non-negative, got {idx}" + ); + } + let idx = idx as u64; + validate_not_spatial(raster, name, "RS_Slice")?; + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + let spatial_dims = raster.spatial_dims(); + new_builder.start_raster_nd(&t, &spatial_dims, raster.spatial_shape(), raster.crs())?; + + require_any_band_has_dim(raster, name, "RS_Slice")?; + + for band_idx in 0..raster.num_bands() { + let band = raster + .band(band_idx) + .map_err(|e| arrow_datafusion_err!(e))?; + + // Pass-through: bands that don't carry the named + // dimension are emitted unchanged. Same convention as + // RS_DimToBand, and matches xarray's `isel` — variables + // without the indexed dim are left alone. + let Some(dim_idx) = band.dim_index(name) else { + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let band_name = raster.band_name(band_idx); + new_builder.start_band_nd( + band_name, + &dim_name_refs, + band.shape(), + band.data_type(), + band.nodata(), + None, + None, + )?; + let data = band.contiguous_data()?; + new_builder.band_data_writer().append_value(&data); + new_builder.finish_band()?; + continue; + }; + + let shape = band.shape(); + if idx >= shape[dim_idx] { + return exec_err!( + "RS_Slice: index {idx} out of range for dimension '{name}' with size {}", + shape[dim_idx] + ); + } + + let new_dim_names: Vec<&str> = band + .dim_names() + .into_iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, n)| n) + .collect(); + let new_shape: Vec = shape + .iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, &s)| s) + .collect(); + + let sliced_data = + extract_slice(band.as_ref(), dim_idx, idx, 1)?; + + let band_name = raster.band_name(band_idx); + let new_dim_name_refs: Vec<&str> = + new_dim_names.to_vec(); + new_builder.start_band_nd( + band_name, + &new_dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// RS_SliceRange +// =========================================================================== + +/// RS_SliceRange(raster, dim_name, start, end) -> Raster +/// +/// Narrows each band along the named dimension to the half-open range +/// `[start, end)`, keeping the dimension in the output with reduced size. +pub fn rs_slicerange_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_slicerange", + vec![Arc::new(RsSliceRange {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsSliceRange {} + +impl SedonaScalarKernel for RsSliceRange { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ArgMatcher::is_integer(), + ], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let start_array = args[2].clone().cast_to(&DataType::Int64, None)?; + let start_array = start_array.into_array(executor.num_iterations())?; + let start_array = as_int64_array(&start_array)?; + + let end_array = args[3].clone().cast_to(&DataType::Int64, None)?; + let end_array = end_array.into_array(executor.num_iterations())?; + let end_array = as_int64_array(&end_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut start_iter = start_array.iter(); + let mut end_iter = end_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let start = start_iter.next().unwrap(); + let end = end_iter.next().unwrap(); + + match (raster_opt, dim_name, start, end) { + (None, _, _, _) | (_, None, _, _) | (_, _, None, _) | (_, _, _, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name), Some(start_val), Some(end_val)) => { + if start_val < 0 { + return exec_err!( + "RS_SliceRange: start must be non-negative, got {start_val}" + ); + } + if end_val < 0 { + return exec_err!( + "RS_SliceRange: end must be non-negative, got {end_val}" + ); + } + let start_val = start_val as u64; + let end_val = end_val as u64; + validate_not_spatial(raster, name, "RS_SliceRange")?; + + if start_val >= end_val { + return exec_err!( + "RS_SliceRange: start ({start_val}) must be less than end ({end_val})" + ); + } + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + let spatial_dims = raster.spatial_dims(); + new_builder.start_raster_nd(&t, &spatial_dims, raster.spatial_shape(), raster.crs())?; + + require_any_band_has_dim(raster, name, "RS_SliceRange")?; + + for band_idx in 0..raster.num_bands() { + let band = raster + .band(band_idx) + .map_err(|e| arrow_datafusion_err!(e))?; + + // Pass-through: bands that don't carry the named + // dimension are emitted unchanged. Same convention as + // RS_Slice and RS_DimToBand. + let Some(dim_idx) = band.dim_index(name) else { + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let band_name = raster.band_name(band_idx); + new_builder.start_band_nd( + band_name, + &dim_name_refs, + band.shape(), + band.data_type(), + band.nodata(), + None, + None, + )?; + let data = band.contiguous_data()?; + new_builder.band_data_writer().append_value(&data); + new_builder.finish_band()?; + continue; + }; + + let shape = band.shape(); + if end_val > shape[dim_idx] { + return exec_err!( + "RS_SliceRange: end ({end_val}) out of range for dimension '{name}' with size {}", + shape[dim_idx] + ); + } + + let range_len = end_val - start_val; + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let mut new_shape: Vec = shape.to_vec(); + new_shape[dim_idx] = range_len; + + let sliced_data = + extract_slice(band.as_ref(), dim_idx, start_val, range_len)?; + + let band_name = raster.band_name(band_idx); + new_builder.start_band_nd( + band_name, + &dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// Shared helpers +// =========================================================================== + +/// Validate that the dimension name is not a spatial dimension. +/// Verify that at least one band in `raster` carries the named dimension. +/// +/// Used as a pre-flight by the manipulation functions (`RS_Slice`, +/// `RS_SliceRange`, `RS_DimToBand`) so that a typo'd dimension name +/// surfaces as a clean error rather than silently passing every band +/// through unchanged. +pub(crate) fn require_any_band_has_dim( + raster: &dyn RasterRef, + name: &str, + func_name: &str, +) -> Result<()> { + let any_band_has_dim = (0..raster.num_bands()).any(|i| { + raster + .band(i) + .ok() + .and_then(|b| b.dim_index(name)) + .is_some() + }); + if !any_band_has_dim { + return exec_err!("{func_name}: no band has dimension '{name}'"); + } + Ok(()) +} + +pub(crate) fn validate_not_spatial( + raster: &dyn RasterRef, + dim_name: &str, + func_name: &str, +) -> Result<()> { + if dim_name == raster.x_dim() || dim_name == raster.y_dim() { + return exec_err!("{func_name}: cannot slice spatial dimension '{dim_name}'"); + } + Ok(()) +} + +/// Extract a slice of data from a band along a given dimension. +/// +/// For `count == 1`, this extracts a single index (used by RS_Slice). +/// For `count > 1`, this extracts a contiguous range `[start, start+count)` +/// (used by RS_SliceRange). +/// +/// The algorithm works on C-order (row-major) layout: +/// - `outer_count`: product of shape dimensions before `dim_idx` +/// - `inner_size`: product of shape dimensions after `dim_idx` * elem_size +/// - `stride`: `shape[dim_idx] * inner_size` (bytes between outer elements) +/// +/// For each outer element, we copy `count * inner_size` bytes starting at +/// `start * inner_size` within that stride. +pub(crate) fn extract_slice( + band: &dyn BandRef, + dim_idx: usize, + start: u64, + count: u64, +) -> Result> { + let shape = band.shape(); + let elem_size = band.data_type().byte_size() as u64; + let data = band.contiguous_data()?; + + let outer_count: u64 = shape[..dim_idx].iter().product(); + let inner_size: u64 = shape[dim_idx + 1..].iter().product::() * elem_size; + let stride = shape[dim_idx] * inner_size; + let copy_size = (count * inner_size) as usize; + let offset_within_stride = start * inner_size; + + let total_output = (outer_count as usize) * copy_size; + let mut output = Vec::with_capacity(total_output); + + for outer in 0..outer_count { + let src_start = (outer * stride + offset_within_stride) as usize; + output.extend_from_slice(&data[src_start..src_start + copy_size]); + } + + Ok(output) +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StructArray; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_raster::array::RasterStructArray; + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::RasterRef; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 3D raster with shape [time, height, width] and + /// sequential UInt8 data so we can verify slicing correctness. + fn build_3d_raster_sequential(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd( + &transform, + &["x", "y"], + &[width as i64, height as i64], + None, + ) + .unwrap(); + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[time, height, width], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let total = (time * height * width) as usize; + let data: Vec = (0..total).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a single-row raster with two bands of different + /// dimensionality, used to exercise pass-through behavior. + /// Band 0: 2D `[y=2, x=3]`, UInt8 sequential 0..6. + /// Band 1: 3D `[time=3, y=2, x=3]`, UInt8 sequential 0..18. + fn build_mixed_dim_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3i64, 2], None) + .unwrap(); + // Band 0: 2D, no `time` axis. + builder + .start_band_nd( + None, + &["y", "x"], + &[2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value((0u8..6).collect::>()); + builder.finish_band().unwrap(); + // Band 1: 3D with `time`. + builder + .start_band_nd( + None, + &["time", "y", "x"], + &[3, 2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value((0u8..18).collect::>()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + #[test] + fn slice_passes_through_bands_without_dim() { + // RS_Slice on a heterogeneous raster: bands that don't carry the + // named dim are emitted unchanged; bands that do are sliced. + // Matches xarray's `ds.isel({dim: i})`. + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_mixed_dim_raster(); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 1_i64) + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + assert_eq!(raster.num_bands(), 2); + + // Band 0 had no `time` axis — passes through unchanged: 2-D [2, 3]. + let band0 = raster.band(0).unwrap(); + assert_eq!(band0.dim_names(), vec!["y", "x"]); + assert_eq!(band0.shape(), &[2, 3]); + let band0_data = band0.contiguous_data().unwrap(); + assert_eq!(band0_data.as_ref(), &(0u8..6).collect::>()[..]); + + // Band 1 had `time` — sliced to time=1: 2-D [2, 3], bytes 6..12. + let band1 = raster.band(1).unwrap(); + assert_eq!(band1.dim_names(), vec!["y", "x"]); + assert_eq!(band1.shape(), &[2, 3]); + let band1_data = band1.contiguous_data().unwrap(); + assert_eq!(band1_data.as_ref(), &(6u8..12).collect::>()[..]); + } + + #[test] + fn slice_errors_when_no_band_has_dim() { + // Pre-flight error: if no band carries the named dim, the typo'd + // name surfaces as an explicit error rather than silently passing + // every band through. + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "nonexistent", 0_i64); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("no band has dimension 'nonexistent'"), + "Unexpected error: {err}" + ); + } + + #[test] + fn slicerange_passes_through_bands_without_dim() { + // Same pass-through semantics for RS_SliceRange. + let kernel = RsSliceRange {}; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + SedonaType::Arrow(DataType::Int64), + ]; + let rasters = build_mixed_dim_raster(); + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + ]; + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + let result_struct = match result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array result"), + }; + let result_struct = result_struct + .as_any() + .downcast_ref::() + .unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + assert_eq!(raster.num_bands(), 2); + + // Band 0 had no `time` — passes through unchanged. + let band0 = raster.band(0).unwrap(); + assert_eq!(band0.shape(), &[2, 3]); + + // Band 1 had `time` — narrowed to [1, 3): 2 time steps, shape [2, 2, 3]. + let band1 = raster.band(1).unwrap(); + assert_eq!(band1.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band1.shape(), &[2, 2, 3]); + } + + #[test] + fn slicerange_errors_when_no_band_has_dim() { + let kernel = RsSliceRange {}; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + SedonaType::Arrow(DataType::Int64), + ]; + let rasters = build_3d_raster_sequential(3, 4, 5); + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("nonexistent".to_string()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(0))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), + ]; + let result = kernel.invoke_batch(&arg_types, &args); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("no band has dimension 'nonexistent'"), + "Unexpected error: {err}" + ); + } + + #[test] + fn slice_negative_index_error() { + // Negative index used to silently cast through `as u64`, producing + // a misleading "out of range with size N" error. + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "time", -1_i64); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("index must be non-negative"), + "Unexpected error: {err}" + ); + } + + #[test] + fn slicerange_negative_start_or_end_error() { + let kernel = RsSliceRange {}; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + SedonaType::Arrow(DataType::Int64), + ]; + let rasters = build_3d_raster_sequential(3, 4, 5); + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(-1))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ]; + let result = kernel.invoke_batch(&arg_types, &args); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("start must be non-negative"), + "Unexpected error: {err}" + ); + } + + #[test] + fn slice_out_of_bounds_on_band_that_has_dim_in_heterogeneous_raster() { + // Heterogeneous raster: pre-flight passes (band 1 has `time`), but + // the index is out of range for band 1. The error must still fire + // — pass-through doesn't swallow bounds errors. + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + // build_mixed_dim_raster: band 0 has no time, band 1 has time=3. + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "time", 5_i64); + let err = result.unwrap_err().to_string(); + assert!(err.contains("out of range"), "Unexpected error: {err}"); + } + + #[test] + fn slice_3d_on_time() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + // shape [time=3, y=4, x=5], sequential data 0..60 + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 1_i64) + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should now be 2D: [y=4, x=5] + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[4, 5]); + + // Data should be time slice 1: bytes 20..40 of original + let data = band.contiguous_data().unwrap(); + let expected: Vec = (20..40).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn slicerange_3d_on_time() { + let kernel = RsSliceRange {}; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + SedonaType::Arrow(DataType::Int64), + ]; + + // shape [time=3, y=4, x=5], sequential data 0..60 + let rasters = build_3d_raster_sequential(3, 4, 5); + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(0))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ]; + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + let result_struct = match result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array result"), + }; + let result_struct = result_struct + .as_any() + .downcast_ref::() + .unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should still be 3D: [time=2, y=4, x=5] + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band.shape(), &[2, 4, 5]); + + // Data should be first 2 time slices: bytes 0..40 + let data = band.contiguous_data().unwrap(); + let expected: Vec = (0..40).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn slice_spatial_dim_error() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_3d_raster_sequential(3, 4, 5); + + // Try to slice "x" + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters.clone()), "x", 0_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot slice spatial dimension"), + "Unexpected error: {err_msg}" + ); + + // Try to slice "y" + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "y", 0_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot slice spatial dimension"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn slice_index_out_of_range() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "time", 3_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("out of range"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn slice_null_raster() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 0_i64) + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } +}