From dad6f8c341bf340a4041eb6e6ba8b50500ce07fe Mon Sep 17 00:00:00 2001 From: jameswillis Date: Mon, 4 May 2026 16:08:22 -0700 Subject: [PATCH 1/6] refactor(sedona-schema): canonical N-D raster schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces #787's 2D-only band schema with the canonical N-D schema: spatial_dims/spatial_shape at the raster level; bands carry dim_names, source_shape, nullable view, outdb_uri, outdb_format, plus the non-nullable data buffer. Removes nodata_value, storage_type, outdb_url, and outdb_band_id - every one is encodable in the new schema: - storage_type ↔ outdb_uri.is_null() (null = InDb, set = OutDbRef). - outdb_url ↔ outdb_uri (no rename, same string). - outdb_band_id ↔ encoded inside outdb_uri (#band=N or GDAL native subdataset URI), parsed only inside the GDAL format driver. - nodata_value ↔ typed nodata: Binary (a null row means "no nodata"). Top-level adds spatial_dims: List and spatial_shape: List; nullable view is List> where a null row encodes the canonical identity view. Note: intermediate commits in this PR are not expected to build; only the PR tip is CI-green. The trait, reader/builder, RS_* migration, and GDAL loader port land in subsequent commits. --- rust/sedona-schema/src/raster.rs | 439 +++++++++++++++++-------------- 1 file changed, 242 insertions(+), 197 deletions(-) diff --git a/rust/sedona-schema/src/raster.rs b/rust/sedona-schema/src/raster.rs index b5b8745c4..81ce42aa6 100644 --- a/rust/sedona-schema/src/raster.rs +++ b/rust/sedona-schema/src/raster.rs @@ -16,34 +16,54 @@ // under the License. use arrow_schema::{DataType, Field, FieldRef, Fields}; -/// Schema for storing raster data in Apache Arrow format. -/// Utilizing nested structs and lists to represent raster metadata and bands. +/// Schema for storing N-dimensional raster data in Apache Arrow format. +/// +/// Each raster has a CRS, an affine transform, a list of spatial dimension +/// names (`spatial_dims`) and sizes (`spatial_shape`), and a list of bands. +/// Each band is an N-D chunk with named dimensions, a `source_shape` +/// describing the natural extent of its underlying buffer, and a `view` +/// describing the visible region of that buffer. +/// +/// `spatial_dims` + `spatial_shape` are the raster-level source of truth for +/// the spatial grid — today length 2 (`["x","y"]`, `[width, height]`), +/// Z-ready for a future 3D phase. All bands must contain every name in +/// `spatial_dims` in their own `dim_names`, with the band's *visible* size +/// for that dim matching `spatial_shape`. +/// +/// Legacy 2D rasters are represented as bands with `dim_names=["y","x"]` and +/// `source_shape=[height, width]`. #[derive(Debug, PartialEq, Clone)] pub struct RasterSchema; + impl RasterSchema { /// Returns the top-level fields for the raster schema structure. pub fn fields() -> Fields { Fields::from(vec![ - Field::new(column::METADATA, Self::metadata_type(), false), Field::new(column::CRS, Self::crs_type(), true), // Optional: may be inferred from data + Field::new(column::TRANSFORM, Self::transform_type(), false), + Field::new(column::SPATIAL_DIMS, Self::spatial_dims_type(), false), + Field::new(column::SPATIAL_SHAPE, Self::spatial_shape_type(), false), Field::new(column::BANDS, Self::bands_type(), true), ]) } - /// Raster metadata schema - pub fn metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - // Raster dimensions - Field::new(column::WIDTH, DataType::UInt64, false), - Field::new(column::HEIGHT, DataType::UInt64, false), - // Geospatial transformation parameters - Field::new(column::UPPERLEFT_X, DataType::Float64, false), - Field::new(column::UPPERLEFT_Y, DataType::Float64, false), - Field::new(column::SCALE_X, DataType::Float64, false), - Field::new(column::SCALE_Y, DataType::Float64, false), - Field::new(column::SKEW_X, DataType::Float64, false), - Field::new(column::SKEW_Y, DataType::Float64, false), - ])) + /// Affine transform schema — 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + pub fn transform_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Float64, false))) + } + + /// Spatial dimension names schema — list of `Utf8View` strings, one per + /// spatial axis. Today always `["x","y"]`; becomes `["x","y","z"]` if a + /// future phase adds Z support. + pub fn spatial_dims_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Utf8View, false))) + } + + /// Spatial shape schema — list of `Int64` sizes in the same order as + /// `spatial_dims`. Today `[width, height]`. + pub fn spatial_shape_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Int64, false))) } /// Bands list schema @@ -55,29 +75,59 @@ impl RasterSchema { ))) } - /// Individual band schema + /// Individual band schema — flattened N-D band with dimension metadata. + /// + /// Out-of-band ("outdb") bands carry two orthogonal identifiers: + /// - `outdb_uri` is the *location* (what scheme/registry to dispatch to, + /// e.g. `s3://bucket/file.tif`, `file:///…`, `mem://…`). + /// - `outdb_format` is the *format* (how to interpret the bytes, e.g. + /// `"geotiff"`, `"zarr"`). Null format means in-memory — the band's + /// `data` buffer is authoritative. pub fn band_type() -> DataType { DataType::Struct(Fields::from(vec![ - Field::new(column::METADATA, Self::band_metadata_type(), false), - Field::new(column::DATA, Self::band_data_type(), false), + Field::new(column::NAME, DataType::Utf8, true), + Field::new(column::DIM_NAMES, Self::dim_names_type(), false), + Field::new(column::SOURCE_SHAPE, Self::source_shape_type(), false), + Field::new(column::DATATYPE, DataType::UInt32, false), + Field::new(column::NODATA, DataType::Binary, true), + Field::new(column::VIEW, Self::view_type(), true), + Field::new(column::OUTDB_URI, DataType::Utf8, true), + Field::new(column::OUTDB_FORMAT, DataType::Utf8View, true), + Field::new(column::DATA, DataType::BinaryView, false), ])) } - /// Band metadata schema - pub fn band_metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::NODATAVALUE, DataType::Binary, true), // Optional: null means no nodata value specified - Field::new(column::STORAGE_TYPE, DataType::UInt32, false), - Field::new(column::DATATYPE, DataType::UInt32, false), - // OutDb reference fields - only used when storage_type == OutDbRef - Field::new(column::OUTDB_URL, DataType::Utf8, true), - Field::new(column::OUTDB_BAND_ID, DataType::UInt32, true), - ])) + /// Dimension names list type + pub fn dim_names_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Utf8, false))) + } + + /// Source shape list type — the natural C-order extent of the band's + /// `data` buffer (or outdb-resolved source) per dimension. The *visible* + /// shape exposed to consumers is derived from `view`: + /// `[entry.steps for entry in view]`. + pub fn source_shape_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::UInt64, false))) } - /// Band data schema - stores the actual raster pixel data as a binary blob - pub fn band_data_type() -> DataType { - DataType::BinaryView + /// View list type — one entry per dimension in the band's *visible* + /// order. Each entry is a `(source_axis, start, step, steps)` quadruple + /// describing how the visible axis maps onto the band's source shape. + /// The field is nullable: a null view denotes the identity view + /// `[(i, 0, 1, source_shape[i]) for i in 0..ndim]` and is the canonical + /// representation for any band whose data has not been sliced. See + /// `RasterSchema` doc for full semantics. + pub fn view_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + "item", + DataType::Struct(Fields::from(vec![ + Field::new("source_axis", DataType::Int64, false), + Field::new("start", DataType::Int64, false), + Field::new("step", DataType::Int64, false), + Field::new("steps", DataType::Int64, false), + ])), + false, + ))) } /// Coordinate Reference System (CRS) schema - stores CRS as JSON string (PROJ or WKT format) @@ -116,6 +166,23 @@ impl BandDataType { } } + /// Try to convert from a u32 discriminant value. + pub fn try_from_u32(value: u32) -> Option { + match value { + 1 => Some(BandDataType::UInt8), + 2 => Some(BandDataType::UInt16), + 3 => Some(BandDataType::Int16), + 4 => Some(BandDataType::UInt32), + 5 => Some(BandDataType::Int32), + 6 => Some(BandDataType::Float32), + 7 => Some(BandDataType::Float64), + 8 => Some(BandDataType::UInt64), + 9 => Some(BandDataType::Int64), + 10 => Some(BandDataType::Int8), + _ => None, + } + } + /// Java/Sedona-compatible pixel type name (e.g. `"UNSIGNED_8BITS"`). pub fn pixel_type_name(&self) -> &'static str { match self { @@ -134,88 +201,60 @@ impl BandDataType { } } -/// Storage strategy for raster band data within Apache Arrow arrays. -/// -/// This enum defines how raster data is physically stored and accessed: -/// -/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Self-contained, no external dependencies, fast access for small-medium rasters -/// - Increases Arrow array size, memory usage grows and copy times increase with raster size -/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) -/// -/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Requires external storage management, potential for broken references -/// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)] -pub enum StorageType { - InDb = 0, - OutDbRef = 1, -} - /// Hard-coded column indices for performant access to nested struct fields. /// These indices must match the exact order defined in the RasterSchema methods. /// /// Using compile-time constants avoids string lookups and provides type safety /// when accessing nested struct fields in Arrow arrays. -pub mod metadata_indices { - pub const WIDTH: usize = 0; - pub const HEIGHT: usize = 1; - pub const UPPERLEFT_X: usize = 2; - pub const UPPERLEFT_Y: usize = 3; - pub const SCALE_X: usize = 4; - pub const SCALE_Y: usize = 5; - pub const SKEW_X: usize = 6; - pub const SKEW_Y: usize = 7; -} - -pub mod band_metadata_indices { - pub const NODATAVALUE: usize = 0; - pub const STORAGE_TYPE: usize = 1; - pub const DATATYPE: usize = 2; - pub const OUTDB_URL: usize = 3; - pub const OUTDB_BAND_ID: usize = 4; +pub mod raster_indices { + pub const CRS: usize = 0; + pub const TRANSFORM: usize = 1; + pub const SPATIAL_DIMS: usize = 2; + pub const SPATIAL_SHAPE: usize = 3; + pub const BANDS: usize = 4; } pub mod band_indices { - pub const METADATA: usize = 0; - pub const DATA: usize = 1; + pub const NAME: usize = 0; + pub const DIM_NAMES: usize = 1; + pub const SOURCE_SHAPE: usize = 2; + pub const DATA_TYPE: usize = 3; + pub const NODATA: usize = 4; + pub const VIEW: usize = 5; + pub const OUTDB_URI: usize = 6; + pub const OUTDB_FORMAT: usize = 7; + pub const DATA: usize = 8; } -pub mod raster_indices { - pub const METADATA: usize = 0; - pub const CRS: usize = 1; - pub const BANDS: usize = 2; +/// Field indices within the `view` struct (`(source_axis, start, step, steps)`). +pub mod band_view_indices { + pub const SOURCE_AXIS: usize = 0; + pub const START: usize = 1; + pub const STEP: usize = 2; + pub const STEPS: usize = 3; } /// Column name constants used throughout the raster schema definition. /// These string constants ensure consistency across schema creation and field access. pub mod column { - pub const METADATA: &str = "metadata"; + // Top-level raster fields + pub const CRS: &str = "crs"; + pub const TRANSFORM: &str = "transform"; + pub const SPATIAL_DIMS: &str = "spatial_dims"; + pub const SPATIAL_SHAPE: &str = "spatial_shape"; pub const BANDS: &str = "bands"; pub const BAND: &str = "band"; - pub const DATA: &str = "data"; - - // Raster metadata fields - pub const WIDTH: &str = "width"; - pub const HEIGHT: &str = "height"; - pub const UPPERLEFT_X: &str = "upperleft_x"; - pub const UPPERLEFT_Y: &str = "upperleft_y"; - pub const SCALE_X: &str = "scale_x"; - pub const SCALE_Y: &str = "scale_y"; - pub const SKEW_X: &str = "skew_x"; - pub const SKEW_Y: &str = "skew_y"; - // Raster CRS field - pub const CRS: &str = "crs"; - // Band metadata fields - pub const NODATAVALUE: &str = "nodata_value"; - pub const STORAGE_TYPE: &str = "storage_type"; + // Band fields + pub const NAME: &str = "name"; + pub const DIM_NAMES: &str = "dim_names"; + pub const SOURCE_SHAPE: &str = "source_shape"; pub const DATATYPE: &str = "data_type"; - pub const OUTDB_URL: &str = "outdb_url"; - pub const OUTDB_BAND_ID: &str = "outdb_band_id"; + pub const NODATA: &str = "nodata"; + pub const VIEW: &str = "view"; + pub const OUTDB_URI: &str = "outdb_uri"; + pub const OUTDB_FORMAT: &str = "outdb_format"; + pub const DATA: &str = "data"; } #[cfg(test)] @@ -225,10 +264,12 @@ mod tests { #[test] fn test_raster_schema_fields() { let fields = RasterSchema::fields(); - assert_eq!(fields.len(), 3); - assert_eq!(fields[0].name(), column::METADATA); - assert_eq!(fields[1].name(), column::CRS); - assert_eq!(fields[2].name(), column::BANDS); + assert_eq!(fields.len(), 5); + assert_eq!(fields[0].name(), column::CRS); + assert_eq!(fields[1].name(), column::TRANSFORM); + assert_eq!(fields[2].name(), column::SPATIAL_DIMS); + assert_eq!(fields[3].name(), column::SPATIAL_SHAPE); + assert_eq!(fields[4].name(), column::BANDS); } /// Comprehensive test to verify all hard-coded indices match the actual schema. @@ -238,128 +279,90 @@ mod tests { fn test_hardcoded_indices_match_schema() { // Test raster-level indices let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 3, "Expected exactly 3 raster fields"); - assert_eq!( - raster_fields[raster_indices::METADATA].name(), - column::METADATA, - "Raster metadata index mismatch" - ); + assert_eq!(raster_fields.len(), 5, "Expected exactly 5 raster fields"); assert_eq!( raster_fields[raster_indices::CRS].name(), column::CRS, "Raster CRS index mismatch" ); + assert_eq!( + raster_fields[raster_indices::TRANSFORM].name(), + column::TRANSFORM, + "Raster TRANSFORM index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::SPATIAL_DIMS].name(), + column::SPATIAL_DIMS, + "Raster SPATIAL_DIMS index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::SPATIAL_SHAPE].name(), + column::SPATIAL_SHAPE, + "Raster SPATIAL_SHAPE index mismatch" + ); assert_eq!( raster_fields[raster_indices::BANDS].name(), column::BANDS, "Raster BANDS index mismatch" ); - // Test metadata indices - let metadata_type = RasterSchema::metadata_type(); - if let DataType::Struct(metadata_fields) = metadata_type { - assert_eq!( - metadata_fields.len(), - 8, - "Expected exactly 8 metadata fields" - ); - assert_eq!( - metadata_fields[metadata_indices::WIDTH].name(), - column::WIDTH, - "Metadata width index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::HEIGHT].name(), - column::HEIGHT, - "Metadata height index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_X].name(), - column::UPPERLEFT_X, - "Metadata upperleft_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_Y].name(), - column::UPPERLEFT_Y, - "Metadata upperleft_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_X].name(), - column::SCALE_X, - "Metadata scale_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_Y].name(), - column::SCALE_Y, - "Metadata scale_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_X].name(), - column::SKEW_X, - "Metadata skew_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_Y].name(), - column::SKEW_Y, - "Metadata skew_y index mismatch" - ); - } else { - panic!("Expected Struct type for metadata"); - } - - // Test band metadata indices - let band_metadata_type = RasterSchema::band_metadata_type(); - if let DataType::Struct(band_metadata_fields) = band_metadata_type { + // Test band indices + let band_type = RasterSchema::band_type(); + if let DataType::Struct(band_fields) = band_type { + assert_eq!(band_fields.len(), 9, "Expected exactly 9 band fields"); + assert_eq!(band_fields[band_indices::NAME].name(), column::NAME); assert_eq!( - band_metadata_fields.len(), - 5, - "Expected exactly 5 band metadata fields" + band_fields[band_indices::DIM_NAMES].name(), + column::DIM_NAMES ); assert_eq!( - band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), - column::NODATAVALUE, - "Band metadata nodatavalue index mismatch" + band_fields[band_indices::SOURCE_SHAPE].name(), + column::SOURCE_SHAPE ); assert_eq!( - band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), - column::STORAGE_TYPE, - "Band metadata storage_type index mismatch" + band_fields[band_indices::DATA_TYPE].name(), + column::DATATYPE ); - assert_eq!( - band_metadata_fields[band_metadata_indices::DATATYPE].name(), - column::DATATYPE, - "Band metadata datatype index mismatch" + assert_eq!(band_fields[band_indices::NODATA].name(), column::NODATA); + assert_eq!(band_fields[band_indices::VIEW].name(), column::VIEW); + assert!( + band_fields[band_indices::VIEW].is_nullable(), + "view field must be nullable — null encodes the identity view" ); assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_URL].name(), - column::OUTDB_URL, - "Band metadata outdb_url index mismatch" + band_fields[band_indices::OUTDB_URI].name(), + column::OUTDB_URI ); assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_BAND_ID].name(), - column::OUTDB_BAND_ID, - "Band metadata outdb_band_id index mismatch" + band_fields[band_indices::OUTDB_FORMAT].name(), + column::OUTDB_FORMAT ); + assert_eq!(band_fields[band_indices::DATA].name(), column::DATA); } else { - panic!("Expected Struct type for band metadata"); + panic!("Expected Struct type for band"); } + } - // Test band indices - let band_type = RasterSchema::band_type(); - if let DataType::Struct(band_fields) = band_type { - assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); - assert_eq!( - band_fields[band_indices::METADATA].name(), - column::METADATA, - "Band metadata index mismatch" - ); - assert_eq!( - band_fields[band_indices::DATA].name(), - column::DATA, - "Band data index mismatch" - ); - } else { - panic!("Expected Struct type for band"); + #[test] + fn test_view_type_struct_shape() { + // The view struct must have exactly 4 Int64 fields in the order + // expected by band_view_indices. + let DataType::List(item_field) = RasterSchema::view_type() else { + panic!("Expected List type for view"); + }; + let DataType::Struct(view_fields) = item_field.data_type() else { + panic!("Expected Struct type inside view list"); + }; + assert_eq!(view_fields.len(), 4); + assert_eq!( + view_fields[band_view_indices::SOURCE_AXIS].name(), + "source_axis" + ); + assert_eq!(view_fields[band_view_indices::START].name(), "start"); + assert_eq!(view_fields[band_view_indices::STEP].name(), "step"); + assert_eq!(view_fields[band_view_indices::STEPS].name(), "steps"); + for f in view_fields.iter() { + assert_eq!(f.data_type(), &DataType::Int64); } } @@ -377,6 +380,48 @@ mod tests { assert_eq!(BandDataType::Float64.byte_size(), 8); } + #[test] + fn test_band_data_type_try_from_u32() { + assert_eq!(BandDataType::try_from_u32(1), Some(BandDataType::UInt8)); + assert_eq!(BandDataType::try_from_u32(2), Some(BandDataType::UInt16)); + assert_eq!(BandDataType::try_from_u32(3), Some(BandDataType::Int16)); + assert_eq!(BandDataType::try_from_u32(4), Some(BandDataType::UInt32)); + assert_eq!(BandDataType::try_from_u32(5), Some(BandDataType::Int32)); + assert_eq!(BandDataType::try_from_u32(6), Some(BandDataType::Float32)); + assert_eq!(BandDataType::try_from_u32(7), Some(BandDataType::Float64)); + assert_eq!(BandDataType::try_from_u32(8), Some(BandDataType::UInt64)); + assert_eq!(BandDataType::try_from_u32(9), Some(BandDataType::Int64)); + assert_eq!(BandDataType::try_from_u32(10), Some(BandDataType::Int8)); + assert_eq!(BandDataType::try_from_u32(0), None); + assert_eq!(BandDataType::try_from_u32(11), None); + assert_eq!(BandDataType::try_from_u32(u32::MAX), None); + } + + #[test] + fn test_band_data_type_roundtrip_u32() { + // Verify that discriminant → try_from_u32 round-trips for all variants + let all_types = [ + BandDataType::UInt8, + BandDataType::UInt16, + BandDataType::Int16, + BandDataType::UInt32, + BandDataType::Int32, + BandDataType::Float32, + BandDataType::Float64, + BandDataType::UInt64, + BandDataType::Int64, + BandDataType::Int8, + ]; + for dt in all_types { + let value = dt as u32; + assert_eq!( + BandDataType::try_from_u32(value), + Some(dt), + "Round-trip failed for {dt:?} (discriminant {value})" + ); + } + } + #[test] fn test_band_data_type_pixel_type_name() { assert_eq!(BandDataType::UInt8.pixel_type_name(), "UNSIGNED_8BITS"); From d69069a3779594316041761095b4bed6f107864f Mon Sep 17 00:00:00 2001 From: jameswillis Date: Mon, 4 May 2026 16:08:35 -0700 Subject: [PATCH 2/6] feat(sedona-raster): N-D trait surface and BandRef::is_2d MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RasterRef and BandRef accessors over the canonical N-D schema: spatial_dims/spatial_shape, transform, crs, num_bands, band(i), and band-level dim_names, source_shape, shape (visible, derived from view), view, data_type, nodata, outdb_uri, outdb_format, nd_buffer, contiguous_data returning Cow<[u8]>. validate_view enforces all view rules including i64-overflow on start + (steps-1)*step. NdBuffer exposes raw buffer + shape + byte strides + offset for zero-copy access (numpy / Arrow C Data Interface boundary); VIEW → byte strides happens inside nd_buffer(). Adds BandRef::is_2d() default method as the gate GDAL-backed paths use to refuse N-D input cleanly: true iff dim_names == ["y","x"] over the identity view. --- rust/sedona-raster/src/traits.rs | 507 +++++++++++++++++++++++++------ 1 file changed, 409 insertions(+), 98 deletions(-) diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index f8541ff33..82d5dc806 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -15,112 +15,297 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; + use arrow_schema::ArrowError; +use sedona_schema::raster::BandDataType; -use sedona_schema::raster::{BandDataType, StorageType}; - -/// Metadata for a raster -#[derive(Debug, Clone)] -pub struct RasterMetadata { - pub width: u64, - pub height: u64, - pub upperleft_x: f64, - pub upperleft_y: f64, - pub scale_x: f64, - pub scale_y: f64, - pub skew_x: f64, - pub skew_y: f64, +/// Zero-copy view into a band's N-D data buffer with layout metadata. +/// +/// `shape`, `strides`, and `offset` describe the *visible* region in +/// byte-stride terms — they are computed by composing the band's +/// `source_shape` (the natural extent of `buffer`) with its `view` +/// (the per-axis `(source_axis, start, step, steps)` slice spec). Stride +/// can be zero (broadcast) or negative (reverse iteration), and may not be +/// C-order. Consumers that need a flat row-major buffer should use +/// `BandRef::contiguous_data()` instead. +#[derive(Debug)] +pub struct NdBuffer<'a> { + pub buffer: &'a [u8], + pub shape: &'a [u64], + pub strides: &'a [i64], + pub offset: u64, + pub data_type: BandDataType, } -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, - /// URL for OutDb reference (only used when storage_type == OutDbRef) - pub outdb_url: Option, - /// Band ID within the OutDb resource (only used when storage_type == OutDbRef) - pub outdb_band_id: Option, +/// One per-dimension entry of a band's logical view. Describes how a +/// visible axis maps onto an axis of the underlying source buffer. +/// +/// - `source_axis`: index into the band's `source_shape` that this visible +/// axis reads from. Across a band's full view, `source_axis` values must +/// form a permutation of `0..ndim` — axis-dropping and axis-introducing +/// views are not supported today. +/// - `start`: starting index along the source axis (in elements, not bytes). +/// - `step`: stride between consecutive visible elements along the source +/// axis. `step == 0` means broadcast (the same source element is +/// exposed `steps` times); negative `step` means reverse iteration. +/// - `steps`: number of visible elements along this axis. `steps == 0` is +/// allowed (empty axis). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ViewEntry { + pub source_axis: i64, + pub start: i64, + pub step: i64, + pub steps: i64, } -/// Trait for accessing complete raster data +/// Trait for accessing an N-dimensional raster (top level). +/// +/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with +/// a single flat interface. Bands are 0-indexed. pub trait RasterRef { - /// Raster metadata accessor - fn metadata(&self) -> &dyn MetadataRef; - /// CRS accessor + /// Number of bands/variables + fn num_bands(&self) -> usize; + + /// Access a band by 0-based index + fn band(&self, index: usize) -> Option>; + + /// Band name (e.g., Zarr variable name). None for unnamed bands. + fn band_name(&self, index: usize) -> Option<&str>; + + /// Fast path for band data type — reads the scalar `data_type` column + /// without materialising a full `BandRef`. UDFs that only need this + /// metadata field should prefer this over `band(i)?.data_type()`. + /// Returns None if `index` is out of range or the discriminant is invalid. + /// + /// The default implementation delegates to `band(i)`. Backends with a + /// flat columnar layout should override for the no-allocation fast path. + fn band_data_type(&self, index: usize) -> Option { + self.band(index).map(|b| b.data_type()) + } + + /// Fast path for band outdb URI — reads the `outdb_uri` column without + /// materialising a `BandRef`. Returns None if the band has no URI or + /// if `index` is out of range. + /// + /// The default implementation must allocate a `Box`; the + /// raster-array backend overrides it to read the column directly. + /// Default returns None because the borrow can't outlive the boxed band. + fn band_outdb_uri(&self, index: usize) -> Option<&str> { + let _ = index; + None + } + + /// Fast path for band outdb format — reads the `outdb_format` column + /// without materialising a `BandRef`. Default returns None for the + /// same lifetime reason as `band_outdb_uri`. + fn band_outdb_format(&self, index: usize) -> Option<&str> { + let _ = index; + None + } + + /// Fast path for band nodata bytes — reads the `nodata` column without + /// materialising a `BandRef`. Default returns None for the same + /// lifetime reason as `band_outdb_uri`. + fn band_nodata(&self, index: usize) -> Option<&[u8]> { + let _ = index; + None + } + + /// CRS string (PROJJSON, WKT, or authority code). None if not set. fn crs(&self) -> Option<&str>; - /// Bands accessor - fn bands(&self) -> &dyn BandsRef; -} -/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) -pub trait MetadataRef { - /// Width of the raster in pixels - fn width(&self) -> u64; - /// Height of the raster in pixels - fn height(&self) -> u64; - /// X coordinate of the upper-left corner - fn upper_left_x(&self) -> f64; - /// Y coordinate of the upper-left corner - fn upper_left_y(&self) -> f64; - /// X-direction pixel size (scale) - fn scale_x(&self) -> f64; - /// Y-direction pixel size (scale) - fn scale_y(&self) -> f64; - /// X-direction skew/rotation - fn skew_x(&self) -> f64; - /// Y-direction skew/rotation - fn skew_y(&self) -> f64; -} -/// Trait for accessing all bands in a raster -pub trait BandsRef { - /// Number of bands in the raster - fn len(&self) -> usize; - /// Check if no bands are present - fn is_empty(&self) -> bool { - self.len() == 0 - } - /// Get a specific band by number (returns Error if out of bounds) - /// By convention, band numbers are 1-based - fn band(&self, number: usize) -> Result, ArrowError>; - /// Iterator over all bands - fn iter(&self) -> Box + '_>; + /// 6-element affine transform in GDAL GeoTransform order: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + fn transform(&self) -> &[f64]; + + /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase + /// would extend to `["x","y","z"]`). Every band must contain each of these + /// names in its own `dim_names`, with matching sizes. + fn spatial_dims(&self) -> Vec<&str>; + + /// Spatial dimension sizes, in the same order as `spatial_dims`. Today + /// `[width, height]`. + fn spatial_shape(&self) -> &[i64]; + + /// Name of the X spatial dimension (e.g., "x", "lon", "easting"). + fn x_dim(&self) -> &str { + let dims = self.spatial_dims(); + dims.into_iter().next().unwrap_or("x") + } + + /// Name of the Y spatial dimension (e.g., "y", "lat", "northing"). + fn y_dim(&self) -> &str { + let dims = self.spatial_dims(); + dims.into_iter().nth(1).unwrap_or("y") + } + + /// Width in pixels — size of the X spatial dimension from the top-level + /// `spatial_shape`. + fn width(&self) -> Option { + self.spatial_shape().first().map(|&v| v as u64) + } + + /// Height in pixels — size of the Y spatial dimension from the top-level + /// `spatial_shape`. + fn height(&self) -> Option { + self.spatial_shape().get(1).map(|&v| v as u64) + } + + /// Look up a band by name. Returns None if no band has that name. + fn band_by_name(&self, name: &str) -> Option> { + (0..self.num_bands()) + .find(|&i| self.band_name(i) == Some(name)) + .and_then(|i| self.band(i)) + } } -/// Trait for accessing individual band data +/// Trait for accessing a single band/variable within an N-D raster. +/// +/// This is the consumer interface. Implementations handle storage details +/// Two data access paths: +/// - `contiguous_data()` — flat row-major bytes for consumers that don't need +/// stride awareness (most RS_* functions, GDAL boundary, serialization). +/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware +/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies. pub trait BandRef { - /// Band metadata accessor - fn metadata(&self) -> &dyn BandMetadataRef; - /// Raw band data as bytes (zero-copy access) - fn data(&self) -> &[u8]; -} + // -- Dimension metadata -- -/// Trait for accessing individual band metadata -pub trait BandMetadataRef { - /// No-data value as raw bytes (None if null) - fn nodata_value(&self) -> Option<&[u8]>; - /// Storage type (InDb, OutDbRef, etc) - fn storage_type(&self) -> Result; - /// Band data type (UInt8, Float32, etc.) - fn data_type(&self) -> Result; - /// OutDb URL (only used when storage_type == OutDbRef) - fn outdb_url(&self) -> Option<&str>; - /// OutDb band ID (only used when storage_type == OutDbRef) - fn outdb_band_id(&self) -> Option; - - /// No-data value interpreted as f64. + /// Number of dimensions in this band + fn ndim(&self) -> usize; + + /// Dimension names in order (e.g., `["time", "y", "x"]`) + fn dim_names(&self) -> Vec<&str>; + + /// Visible shape — size of each dimension in the band's view, in + /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`. + /// This is what almost all consumers want; use `raw_source_shape()` only + /// when you need to address into the raw `data` buffer (e.g. FFI). + fn shape(&self) -> &[u64]; + + /// **Internal/FFI-only.** Natural C-order extent of the band's + /// underlying `data` buffer, indexed by *source* axis (not visible + /// axis). Almost every consumer wants `shape()` instead — that is the + /// region the band exposes, and is what you compare against + /// `spatial_shape`, iterate over for pixels, and compose further views + /// against. The two only agree when the band's view is the identity; + /// any slice, broadcast, or permutation makes them diverge. + /// + /// Use this only when you need to index directly into the raw `data` + /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you + /// also handle `view()` and the byte-stride layout from `nd_buffer()`. + fn raw_source_shape(&self) -> &[u64]; + + /// Per-visible-dimension view entries describing how the band's + /// visible axes map onto its `source_shape`. `view().len() == ndim()`. + /// See `ViewEntry` for per-entry semantics. + fn view(&self) -> &[ViewEntry]; + + /// Size of a named dimension (None if doesn't exist) + fn dim_size(&self, name: &str) -> Option { + let idx = self.dim_index(name)?; + Some(self.shape()[idx]) + } + + /// Index of a named dimension (None if doesn't exist) + fn dim_index(&self, name: &str) -> Option { + self.dim_names().iter().position(|n| *n == name) + } + + /// True iff this band is shaped exactly like a legacy 2-D raster band: + /// `dim_names == ["y", "x"]` and the view is the identity over the + /// band's `raw_source_shape` (no slice, no broadcast, no permutation). + /// + /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while + /// they wait for an MDArray-aware port. + fn is_2d(&self) -> bool { + let dims = self.dim_names(); + if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" { + return false; + } + let view = self.view(); + let source_shape = self.raw_source_shape(); + if view.len() != 2 || source_shape.len() != 2 { + return false; + } + view.iter().enumerate().all(|(i, v)| { + v.source_axis as usize == i + && v.start == 0 + && v.step == 1 + && v.steps >= 0 + && v.steps as u64 == source_shape[i] + }) + } + + // -- Band metadata -- + + /// Data type for all elements in this band + fn data_type(&self) -> BandDataType; + + /// Nodata value as raw bytes (None if not set) + fn nodata(&self) -> Option<&[u8]>; + + /// OutDb URI — location of the external resource (e.g. + /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for + /// in-memory bands. Scheme resolution is delegated to an + /// `ObjectStoreRegistry`; it does *not* imply a format. + fn outdb_uri(&self) -> Option<&str> { + None + } + + /// OutDb format — how to interpret the bytes at `outdb_uri` + /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's + /// `contiguous_data()` / `nd_buffer()` is authoritative. + fn outdb_format(&self) -> Option<&str> { + None + } + + /// True if this band's bytes live in the `data` buffer (in-database). + /// False if the bytes must be fetched from `outdb_uri` (out-of-database). + /// + /// The discriminator is whether the `data` buffer is non-empty — + /// `outdb_uri` and `outdb_format` are orthogonal location/format hints + /// that may be set on either kind of band. + fn is_indb(&self) -> bool { + // Default: materialize via nd_buffer and check buffer emptiness. + // Concrete impls should override with a direct buffer check. + self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty()) + } + + // -- Data access -- + + /// Raw backing buffer + visible-region layout. Triggers load for lazy + /// impls. The returned `NdBuffer` describes the band's view in + /// byte-stride terms — `shape` is the visible shape, `strides` and + /// `offset` are computed by composing the view with the source's + /// natural C-order byte strides. Strides may be zero (broadcast) or + /// negative (reverse iteration). + fn nd_buffer(&self) -> Result, ArrowError>; + + /// Contiguous row-major bytes covering the *visible* region. Zero-copy + /// (`Cow::Borrowed`) when the view is full identity over a C-order + /// source buffer; copies into a new buffer when the view slices, + /// broadcasts, or permutes. Most RS_* functions use this. + fn contiguous_data(&self) -> Result, ArrowError>; + + /// Nodata value interpreted as f64. /// /// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on - /// success, or an error when the raw bytes have an unexpected length for - /// the band's data type. - fn nodata_value_as_f64(&self) -> Result, ArrowError> { - let bytes = match self.nodata_value() { + /// success, or an error when the raw bytes have an unexpected length. + /// + /// # Warning + /// + /// For 64-bit integer bands (`Int64`, `UInt64`), the conversion to `f64` + /// is lossy when the magnitude exceeds 2^53 — values outside + /// `[-9_007_199_254_740_992, 9_007_199_254_740_992]` will be rounded to + /// the nearest representable double. Use `nodata()` directly to recover + /// the exact bytes if you need full integer precision. + fn nodata_as_f64(&self) -> Result, ArrowError> { + let bytes = match self.nodata() { Some(b) => b, None => return Ok(None), }; - let dt = self.data_type()?; - nodata_bytes_to_f64(bytes, &dt).map(Some) + nodata_bytes_to_f64(bytes, &self.data_type()).map(Some) } } @@ -128,7 +313,7 @@ pub trait BandMetadataRef { /// /// The bytes are expected to be in little-endian order and exactly match the /// byte size of the data type. -fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { +pub fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { macro_rules! read_le { ($t:ty, $n:expr) => {{ let arr: [u8; $n] = bytes.try_into().map_err(|_| { @@ -173,15 +358,6 @@ fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result: Iterator> { - fn len(&self) -> usize; - /// Check if there are no more bands - fn is_empty(&self) -> bool { - self.len() == 0 - } -} - #[cfg(test)] mod tests { use super::*; @@ -217,4 +393,139 @@ mod tests { let result = nodata_bytes_to_f64(&[1, 2, 3], &BandDataType::Float64); assert!(result.is_err()); } + + #[test] + fn test_nodata_as_f64_int64_loses_precision_above_2_pow_53() { + // Locks in the documented warning: nodata bytes for Int64 values + // beyond f64's 53-bit mantissa silently round on conversion. + // The expected f64 is hard-coded — deriving it via `as f64` would + // mean the test invokes the same primitive cast it claims to test. + let big = (1i64 << 53) + 1; // 2^53 + 1; not representable in f64 + let bytes = big.to_le_bytes(); + let val = nodata_bytes_to_f64(&bytes, &BandDataType::Int64).unwrap(); + assert_eq!(val, 9007199254740992.0_f64); + assert_ne!(val as i64, big); + } + + fn ve(source_axis: i64, start: i64, step: i64, steps: i64) -> ViewEntry { + ViewEntry { + source_axis, + start, + step, + steps, + } + } + + /// Minimal `BandRef` stub: only the inputs `is_2d` actually inspects + /// (`dim_names`, `view`, `raw_source_shape`) carry meaningful values; + /// every other method returns a placeholder we never read. + struct StubBand { + dim_names: Vec, + source_shape: Vec, + shape: Vec, + view: Vec, + } + + impl BandRef for StubBand { + fn ndim(&self) -> usize { + self.dim_names.len() + } + fn dim_names(&self) -> Vec<&str> { + self.dim_names.iter().map(String::as_str).collect() + } + fn shape(&self) -> &[u64] { + &self.shape + } + fn raw_source_shape(&self) -> &[u64] { + &self.source_shape + } + fn view(&self) -> &[ViewEntry] { + &self.view + } + fn data_type(&self) -> BandDataType { + BandDataType::UInt8 + } + fn nodata(&self) -> Option<&[u8]> { + None + } + fn nd_buffer(&self) -> Result, ArrowError> { + unimplemented!("not used in is_2d tests") + } + fn contiguous_data(&self) -> Result, ArrowError> { + unimplemented!("not used in is_2d tests") + } + } + + fn band(dims: &[&str], source_shape: &[u64], view: &[ViewEntry]) -> StubBand { + let shape = view.iter().map(|v| v.steps as u64).collect(); + StubBand { + dim_names: dims.iter().map(|s| (*s).to_string()).collect(), + source_shape: source_shape.to_vec(), + shape, + view: view.to_vec(), + } + } + + #[test] + fn is_2d_identity_yx_is_true() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert!(b.is_2d()); + } + + #[test] + fn is_2d_identity_3d_is_false() { + let b = band( + &["time", "y", "x"], + &[3, 4, 5], + &[ve(0, 0, 1, 3), ve(1, 0, 1, 4), ve(2, 0, 1, 5)], + ); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_identity_1d_is_false() { + let b = band(&["x"], &[5], &[ve(0, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_slice_view_is_false() { + // Same dim_names but the y-axis is sliced — view is not the identity. + let b = band(&["y", "x"], &[4, 5], &[ve(0, 1, 1, 2), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_step_two_is_false() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 2, 2), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_broadcast_is_false() { + let b = band(&["y", "x"], &[4, 5], &[ve(0, 0, 0, 4), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_permuted_xy_is_false() { + // dim_names are swapped — not the legacy 2D shape, even though the + // view per-axis is the identity. + let b = band(&["x", "y"], &[5, 4], &[ve(0, 0, 1, 5), ve(1, 0, 1, 4)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_with_transposed_source_axes_is_false() { + // dim_names are ["y","x"] but the view permutes the source axes, + // so the band exposes y-then-x out of an x-then-y source. + let b = band(&["y", "x"], &[5, 4], &[ve(1, 0, 1, 4), ve(0, 0, 1, 5)]); + assert!(!b.is_2d()); + } + + #[test] + fn is_2d_yx_other_dim_names_is_false() { + let b = band(&["lat", "lon"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert!(!b.is_2d()); + } } From 5bebc6ea2edfed61a1b5ca70279a943622163619 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Mon, 4 May 2026 16:08:47 -0700 Subject: [PATCH 3/6] refactor(sedona-raster, sedona-raster-functions, sedona-testing): N-D reader/builder + RS_* migration View-aware Arrow reader (RasterStructArray, BandRefImpl) with corruption- surgery (negative steps, bad source_axis, length mismatch) that round-trips an ArrowError. Builder exposes start_raster / start_band for full N-D plus start_raster_2d / start_band_2d for legacy 2D, with identity-view default written as a null view row. finish_raster validates each band's visible shape against the raster's spatial_shape along the spatial dims. All 33 RS_* functions migrated mechanically; outputs on 2D inputs are byte-identical to #787. RS_BandPath keeps its existing inline fragment-stripping (format-agnostic display, untouched by the GDAL parser). Test helpers in sedona-testing rewritten on the N-D builder API. --- Cargo.lock | 1 + rust/sedona-raster-functions/src/executor.rs | 39 +- .../src/rs_band_accessors.rs | 69 +- .../src/rs_bandpath.rs | 104 +- .../src/rs_convexhull.rs | 14 +- .../src/rs_envelope.rs | 14 +- .../sedona-raster-functions/src/rs_example.rs | 51 +- .../src/rs_georeference.rs | 14 +- .../src/rs_geotransform.rs | 18 +- .../src/rs_numbands.rs | 2 +- .../src/rs_pixel_functions.rs | 2 +- .../sedona-raster-functions/src/rs_setsrid.rs | 30 +- rust/sedona-raster-functions/src/rs_size.rs | 13 +- .../src/rs_spatial_predicates.rs | 37 +- rust/sedona-raster-functions/src/rs_srid.rs | 23 +- rust/sedona-raster/Cargo.toml | 1 + .../src/affine_transformation.rs | 273 ++- rust/sedona-raster/src/array.rs | 1246 ++++++------ rust/sedona-raster/src/builder.rs | 1758 +++++++++++------ rust/sedona-raster/src/display.rs | 21 +- rust/sedona-testing/src/benchmark_util.rs | 5 +- rust/sedona-testing/src/rasters.rs | 456 ++--- 22 files changed, 2279 insertions(+), 1912 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5f233704..2f79cfe9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5701,6 +5701,7 @@ dependencies = [ "approx", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", "sedona-common", "sedona-schema", diff --git a/rust/sedona-raster-functions/src/executor.rs b/rust/sedona-raster-functions/src/executor.rs index 4a4e998f8..774bf282d 100644 --- a/rust/sedona-raster-functions/src/executor.rs +++ b/rust/sedona-raster-functions/src/executor.rs @@ -358,13 +358,16 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { arr0.len() ); } + + // Hoist the RasterStructArray so its lifetime covers the loop. + let scalar_arr1; let r1 = match sv1 { ScalarValue::Struct(arc_struct) => { - let arr1 = RasterStructArray::new(arc_struct.as_ref()); - if arr1.is_null(0) { + scalar_arr1 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr1.is_null(0) { None } else { - Some(arr1.get(0)?) + Some(scalar_arr1.get(0)?) } } ScalarValue::Null => None, @@ -395,13 +398,16 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { arr1.len() ); } + + // Hoist the RasterStructArray so its lifetime covers the loop. + let scalar_arr0; let r0 = match sv0 { ScalarValue::Struct(arc_struct) => { - let arr0 = RasterStructArray::new(arc_struct.as_ref()); - if arr0.is_null(0) { + scalar_arr0 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr0.is_null(0) { None } else { - Some(arr0.get(0)?) + Some(scalar_arr0.get(0)?) } } ScalarValue::Null => None, @@ -421,13 +427,15 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { Ok(()) } (ColumnarValue::Scalar(sv0), ColumnarValue::Scalar(sv1)) => { + // Hoist both RasterStructArrays so their lifetimes cover the loop. + let scalar_arr0; let r0 = match sv0 { ScalarValue::Struct(arc_struct) => { - let arr0 = RasterStructArray::new(arc_struct.as_ref()); - if arr0.is_null(0) { + scalar_arr0 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr0.is_null(0) { None } else { - Some(arr0.get(0)?) + Some(scalar_arr0.get(0)?) } } ScalarValue::Null => None, @@ -435,13 +443,14 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { return sedona_internal_err!("Expected Struct scalar for raster"); } }; + let scalar_arr1; let r1 = match sv1 { ScalarValue::Struct(arc_struct) => { - let arr1 = RasterStructArray::new(arc_struct.as_ref()); - if arr1.is_null(0) { + scalar_arr1 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr1.is_null(0) { None } else { - Some(arr1.get(0)?) + Some(scalar_arr1.get(0)?) } } ScalarValue::Null => None, @@ -724,7 +733,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } @@ -766,7 +775,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } @@ -803,7 +812,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } diff --git a/rust/sedona-raster-functions/src/rs_band_accessors.rs b/rust/sedona-raster-functions/src/rs_band_accessors.rs index ee1a308e1..37d141f70 100644 --- a/rust/sedona-raster-functions/src/rs_band_accessors.rs +++ b/rust/sedona-raster-functions/src/rs_band_accessors.rs @@ -25,7 +25,7 @@ use datafusion_common::cast::as_int32_array; use datafusion_common::error::Result; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_raster::traits::RasterRef; +use sedona_raster::traits::{nodata_bytes_to_f64, RasterRef}; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; // =========================================================================== @@ -120,14 +120,15 @@ fn get_pixel_type( Ok(()) } Some(raster) => { - let num_bands = raster.bands().len(); + let num_bands = raster.num_bands(); if band_index < 1 || band_index > num_bands as i32 { builder.append_null(); return Ok(()); } - let band = raster.bands().band(band_index as usize)?; - let dt = band.metadata().data_type()?; - builder.append_value(dt.pixel_type_name()); + match raster.band_data_type((band_index - 1) as usize) { + Some(dt) => builder.append_value(dt.pixel_type_name()), + None => builder.append_null(), + } Ok(()) } } @@ -224,16 +225,19 @@ fn get_nodata_value( Ok(()) } Some(raster) => { - let num_bands = raster.bands().len(); + let num_bands = raster.num_bands(); if band_index < 1 || band_index > num_bands as i32 { builder.append_null(); return Ok(()); } - let band = raster.bands().band(band_index as usize)?; - let band_meta = band.metadata(); - match band_meta.nodata_value_as_f64()? { - None => builder.append_null(), - Some(val) => builder.append_value(val), + let idx = (band_index - 1) as usize; + match (raster.band_nodata(idx), raster.band_data_type(idx)) { + (Some(bytes), Some(dt)) => { + let val = nodata_bytes_to_f64(bytes, &dt) + .map_err(datafusion_common::DataFusionError::from)?; + builder.append_value(val); + } + _ => builder.append_null(), } Ok(()) } @@ -246,30 +250,27 @@ mod tests { use arrow_array::{Array, Float64Array, Int32Array, Int64Array, StringArray, StructArray}; use datafusion_expr::ScalarUDF; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::datatypes::RASTER; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::rasters::generate_test_rasters; use sedona_testing::testers::ScalarUdfTester; - /// Build a single-row raster StructArray with custom metadata and band metadata. + /// Build a single-row raster StructArray with custom parameters. fn build_custom_raster( - meta: &RasterMetadata, - band_meta: &BandMetadata, + width: u64, + height: u64, + data_type: BandDataType, + nodata: Option<&[u8]>, data: &[u8], crs: Option<&str>, ) -> StructArray { let mut builder = RasterBuilder::new(1); - builder.start_raster(meta, crs).expect("start raster"); builder - .start_band(BandMetadata { - datatype: band_meta.datatype, - nodata_value: band_meta.nodata_value.clone(), - storage_type: band_meta.storage_type, - outdb_url: band_meta.outdb_url.clone(), - outdb_band_id: band_meta.outdb_band_id, - }) + .start_raster_2d(width, height, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, crs) + .expect("start raster"); + builder + .start_band_2d(data_type, nodata) .expect("start band"); builder.band_data_writer().append_value(data); builder.finish_band().expect("finish band"); @@ -401,25 +402,9 @@ mod tests { #[test] fn udf_bandnodatavalue_no_nodata() { // Create a raster without nodata - let meta = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - let band_meta = BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }; let data = vec![1u8, 2, 3, 4]; - let rasters = build_custom_raster(&meta, &band_meta, &data, Some("OGC:CRS84")); + let rasters = + build_custom_raster(2, 2, BandDataType::UInt8, None, &data, Some("OGC:CRS84")); let udf: ScalarUDF = rs_bandnodatavalue_udf().into(); let tester = ScalarUdfTester::new(udf, vec![RASTER]); diff --git a/rust/sedona-raster-functions/src/rs_bandpath.rs b/rust/sedona-raster-functions/src/rs_bandpath.rs index 35cfe9a07..816fe2a43 100644 --- a/rust/sedona-raster-functions/src/rs_bandpath.rs +++ b/rust/sedona-raster-functions/src/rs_bandpath.rs @@ -24,7 +24,6 @@ use datafusion_common::error::Result; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::traits::RasterRef; -use sedona_schema::raster::StorageType; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; /// RS_BandPath() scalar UDF implementation @@ -124,21 +123,22 @@ fn get_band_path( match raster_opt { None => builder.append_null(), Some(raster) => { - let bands = raster.bands(); - let num_bands = bands.len() as i32; + let num_bands = raster.num_bands() as i32; if band_index < 1 || band_index > num_bands { builder.append_null(); } else { - let band = bands.band(band_index as usize)?; - let band_metadata = band.metadata(); - - if band_metadata.storage_type()? == StorageType::OutDbRef { - match band_metadata.outdb_url() { - Some(url) => builder.append_value(url), - None => builder.append_null(), + match raster.band_outdb_uri((band_index - 1) as usize) { + Some(uri) => { + // Strip the URL fragment — it carries loader-internal + // details (band id, chunk coords) that users calling + // RS_BandPath don't want to see. + let path = match uri.rfind('#') { + Some(hash) => &uri[..hash], + None => uri, + }; + builder.append_value(path); } - } else { - builder.append_null() + None => builder.append_null(), } } } @@ -225,11 +225,11 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0, band 1: OutDbRef -> URL + // Raster 0, band 1: OutDbRef → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); - // Raster 1: null raster -> null + // Raster 1: null raster → null assert!(string_array.is_null(1)); - // Raster 2, band 2: OutDbRef -> URL + // Raster 2, band 2: OutDbRef → URI assert_eq!(string_array.value(2), "s3://bucket/raster_2.tif"); } @@ -257,38 +257,33 @@ mod tests { } /// Build a raster array with out-db bands for testing RS_BandPath. + /// URIs include a `#band=N` fragment that loaders use to pick the right + /// sub-dataset; `RS_BandPath` strips the fragment before returning the + /// path to the user. /// Returns a StructArray with 3 rasters: - /// [0] OutDbRef band with URL "s3://bucket/raster_0.tif" + /// [0] OutDbRef band with URI "s3://bucket/raster_0.tif#band=1", format "geotiff" /// [1] null raster - /// [2] Two bands: InDb band 1, OutDbRef band 2 with URL "s3://bucket/raster_2.tif" + /// [2] Two bands: InDb band 1, OutDbRef band 2 with URI "s3://bucket/raster_2.tif#band=3", format "geotiff" fn build_outdb_rasters() -> arrow_array::StructArray { use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; - use sedona_schema::raster::{BandDataType, StorageType}; - - let metadata = RasterMetadata { - width: 4, - height: 4, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + use sedona_schema::raster::BandDataType; let mut builder = RasterBuilder::new(3); // Raster 0: single OutDbRef band - builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://bucket/raster_0.tif".to_string()), - outdb_band_id: Some(1), - }) + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + Some("s3://bucket/raster_0.tif#band=1"), + Some("geotiff"), + ) .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); @@ -298,26 +293,22 @@ mod tests { builder.append_null().unwrap(); // Raster 2: two bands — InDb (band 1) + OutDbRef (band 2) - builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8; 16]); builder.finish_band().unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://bucket/raster_2.tif".to_string()), - outdb_band_id: Some(3), - }) + .start_band( + None, + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + Some("s3://bucket/raster_2.tif#band=3"), + Some("geotiff"), + ) .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); @@ -339,8 +330,7 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0: OutDbRef band 1 → returns URL - assert!(!string_array.is_null(0)); + // Raster 0: OutDbRef band → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); // Raster 1: null raster → null assert!(string_array.is_null(1)); @@ -365,11 +355,11 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0, band 1: OutDbRef → URL + // Raster 0, band 1: OutDbRef → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); // Raster 1: null raster → null assert!(string_array.is_null(1)); - // Raster 2, band 2: OutDbRef → URL + // Raster 2, band 2: OutDbRef → URI assert_eq!(string_array.value(2), "s3://bucket/raster_2.tif"); } diff --git a/rust/sedona-raster-functions/src/rs_convexhull.rs b/rust/sedona-raster-functions/src/rs_convexhull.rs index e124e3e88..f3c56b564 100644 --- a/rust/sedona-raster-functions/src/rs_convexhull.rs +++ b/rust/sedona-raster-functions/src/rs_convexhull.rs @@ -107,8 +107,18 @@ impl SedonaScalarKernel for RsConvexHull { /// of the raster in world coordinates. Due to skew/rotation in the affine /// transformation, each corner must be computed individually. fn write_convexhull_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; // Compute the four corners in pixel coordinates: // Upper-left (0, 0), Upper-right (width, 0), Lower-right (width, height), Lower-left (0, height) diff --git a/rust/sedona-raster-functions/src/rs_envelope.rs b/rust/sedona-raster-functions/src/rs_envelope.rs index 2177a18ae..67a36bfa7 100644 --- a/rust/sedona-raster-functions/src/rs_envelope.rs +++ b/rust/sedona-raster-functions/src/rs_envelope.rs @@ -105,8 +105,18 @@ impl SedonaScalarKernel for RsEnvelope { /// derives the min/max X and Y to produce an axis-aligned bounding box. /// For skewed/rotated rasters, this differs from the convex hull. fn write_envelope_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; // Compute the four corners in world coordinates let (ulx, uly) = to_world_coordinate(raster, 0, 0); diff --git a/rust/sedona-raster-functions/src/rs_example.rs b/rust/sedona-raster-functions/src/rs_example.rs index 48e2fd5ce..f83da24d1 100644 --- a/rust/sedona-raster-functions/src/rs_example.rs +++ b/rust/sedona-raster-functions/src/rs_example.rs @@ -21,13 +21,8 @@ use datafusion_common::error::Result; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::BandMetadata; -use sedona_raster::traits::RasterMetadata; use sedona_schema::{ - crs::lnglat, - datatypes::SedonaType, - matchers::ArgMatcher, - raster::{BandDataType, StorageType}, + crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher, raster::BandDataType, }; /// RS_Example() scalar UDF implementation @@ -60,30 +55,15 @@ impl SedonaScalarKernel for RsExample { let executor = RasterExecutor::new(arg_types, args); let mut builder = RasterBuilder::new(1); - let raster_metadata = RasterMetadata { - width: 64, - height: 32, - upperleft_x: 43.08, - upperleft_y: 79.07, - scale_x: 2.0, - scale_y: 2.0, - skew_x: 1.0, - skew_y: 1.0, - }; + let width: u64 = 64; + let height: u64 = 32; let crs = lnglat().unwrap().to_crs_string(); - builder.start_raster(&raster_metadata, Some(&crs))?; + builder.start_raster_2d(width, height, 43.08, 79.07, 2.0, 2.0, 1.0, 1.0, Some(&crs))?; let nodata_value = 127u8; for band_id in 1..=3 { - builder.start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![nodata_value]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - })?; - - let mut band_data = - vec![band_id as u8; (raster_metadata.width * raster_metadata.height) as usize]; + builder.start_band_2d(BandDataType::UInt8, Some(&[nodata_value]))?; + + let mut band_data = vec![band_id as u8; (width * height) as usize]; band_data[0] = nodata_value; // set the top corner to nodata builder.band_data_writer().append_value(&band_data); @@ -121,16 +101,13 @@ mod tests { assert_eq!(raster_array.len(), 1); let raster = raster_array.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 64); - assert_eq!(metadata.height(), 32); - - let bands = raster.bands(); - let band = bands.band(1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band_metadata.nodata_value(), Some(&[127u8][..])); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); + assert_eq!(raster.width().unwrap(), 64); + assert_eq!(raster.height().unwrap(), 32); + + let band = raster.band(0).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata(), Some(&[127u8][..])); + assert!(band.outdb_uri().is_none()); } else { panic!("Expected scalar struct result"); } diff --git a/rust/sedona-raster-functions/src/rs_georeference.rs b/rust/sedona-raster-functions/src/rs_georeference.rs index bf9b7470b..6afb9f120 100644 --- a/rust/sedona-raster-functions/src/rs_georeference.rs +++ b/rust/sedona-raster-functions/src/rs_georeference.rs @@ -158,13 +158,13 @@ fn format_georeference( match raster_opt { None => builder.append_null(), Some(raster) => { - let metadata = raster.metadata(); - let scale_x = metadata.scale_x(); - let scale_y = metadata.scale_y(); - let skew_x = metadata.skew_x(); - let skew_y = metadata.skew_y(); - let upper_left_x = metadata.upper_left_x(); - let upper_left_y = metadata.upper_left_y(); + let t = raster.transform(); + let scale_x = t[1]; + let scale_y = t[5]; + let skew_x = t[2]; + let skew_y = t[4]; + let upper_left_x = t[0]; + let upper_left_y = t[3]; let georeference = match format { GeoReferenceFormat::Gdal => { diff --git a/rust/sedona-raster-functions/src/rs_geotransform.rs b/rust/sedona-raster-functions/src/rs_geotransform.rs index 9c5a9ee46..0206b7bcd 100644 --- a/rust/sedona-raster-functions/src/rs_geotransform.rs +++ b/rust/sedona-raster-functions/src/rs_geotransform.rs @@ -162,22 +162,18 @@ impl SedonaScalarKernel for RsGeoTransform { match raster_opt { None => builder.append_null(), Some(raster) => { - let metadata = raster.metadata(); + let t = raster.transform(); match self.param { GeoTransformParam::Rotation => { let rotation = rotation(raster); builder.append_value(rotation); } - GeoTransformParam::ScaleX => builder.append_value(metadata.scale_x()), - GeoTransformParam::ScaleY => builder.append_value(metadata.scale_y()), - GeoTransformParam::SkewX => builder.append_value(metadata.skew_x()), - GeoTransformParam::SkewY => builder.append_value(metadata.skew_y()), - GeoTransformParam::UpperLeftX => { - builder.append_value(metadata.upper_left_x()) - } - GeoTransformParam::UpperLeftY => { - builder.append_value(metadata.upper_left_y()) - } + GeoTransformParam::ScaleX => builder.append_value(t[1]), + GeoTransformParam::ScaleY => builder.append_value(t[5]), + GeoTransformParam::SkewX => builder.append_value(t[2]), + GeoTransformParam::SkewY => builder.append_value(t[4]), + GeoTransformParam::UpperLeftX => builder.append_value(t[0]), + GeoTransformParam::UpperLeftY => builder.append_value(t[3]), } } } diff --git a/rust/sedona-raster-functions/src/rs_numbands.rs b/rust/sedona-raster-functions/src/rs_numbands.rs index f25c4df47..d52002c11 100644 --- a/rust/sedona-raster-functions/src/rs_numbands.rs +++ b/rust/sedona-raster-functions/src/rs_numbands.rs @@ -61,7 +61,7 @@ impl SedonaScalarKernel for RsNumBands { match raster_opt { None => builder.append_null(), Some(raster) => { - let num_bands = raster.bands().len() as u32; + let num_bands = raster.num_bands() as u32; builder.append_value(num_bands); } } diff --git a/rust/sedona-raster-functions/src/rs_pixel_functions.rs b/rust/sedona-raster-functions/src/rs_pixel_functions.rs index c6bb048bf..3e880b2f2 100644 --- a/rust/sedona-raster-functions/src/rs_pixel_functions.rs +++ b/rust/sedona-raster-functions/src/rs_pixel_functions.rs @@ -191,7 +191,7 @@ impl SedonaScalarKernel for RsPixelAsCentroid { let grid_x = (col_x - 1) as f64 + 0.5; let grid_y = (row_y - 1) as f64 + 0.5; - let affine = AffineMatrix::from_metadata(raster.metadata()); + let affine = AffineMatrix::from_transform(raster.transform()); let (wx, wy) = affine.transform(grid_x, grid_y); write_wkb_point(&mut builder, (wx, wy)) diff --git a/rust/sedona-raster-functions/src/rs_setsrid.rs b/rust/sedona-raster-functions/src/rs_setsrid.rs index 2ff6134e4..165e8a60e 100644 --- a/rust/sedona-raster-functions/src/rs_setsrid.rs +++ b/rust/sedona-raster-functions/src/rs_setsrid.rs @@ -516,29 +516,21 @@ mod tests { let modified = result_array.get(i).unwrap(); // Metadata preserved - assert_eq!(original.metadata().width(), modified.metadata().width()); - assert_eq!(original.metadata().height(), modified.metadata().height()); - assert_eq!( - original.metadata().upper_left_x(), - modified.metadata().upper_left_x() - ); - assert_eq!( - original.metadata().upper_left_y(), - modified.metadata().upper_left_y() - ); + assert_eq!(original.width().unwrap(), modified.width().unwrap()); + assert_eq!(original.height().unwrap(), modified.height().unwrap()); + assert_eq!(original.transform()[0], modified.transform()[0]); + assert_eq!(original.transform()[3], modified.transform()[3]); // Band data preserved - let orig_bands = original.bands(); - let mod_bands = modified.bands(); - assert_eq!(orig_bands.len(), mod_bands.len()); - for band_idx in 0..orig_bands.len() { - let orig_band = orig_bands.band(band_idx + 1).unwrap(); - let mod_band = mod_bands.band(band_idx + 1).unwrap(); - assert_eq!(orig_band.data(), mod_band.data()); + assert_eq!(original.num_bands(), modified.num_bands()); + for band_idx in 0..original.num_bands() { + let orig_band = original.band(band_idx).unwrap(); + let mod_band = modified.band(band_idx).unwrap(); assert_eq!( - orig_band.metadata().data_type().unwrap(), - mod_band.metadata().data_type().unwrap() + orig_band.contiguous_data().unwrap().as_ref(), + mod_band.contiguous_data().unwrap().as_ref() ); + assert_eq!(orig_band.data_type(), mod_band.data_type()); } // CRS changed diff --git a/rust/sedona-raster-functions/src/rs_size.rs b/rust/sedona-raster-functions/src/rs_size.rs index 6616bc56e..a4dab6872 100644 --- a/rust/sedona-raster-functions/src/rs_size.rs +++ b/rust/sedona-raster-functions/src/rs_size.rs @@ -20,6 +20,7 @@ use crate::executor::RasterExecutor; use arrow_array::builder::UInt64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; +use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::traits::RasterRef; @@ -85,11 +86,19 @@ impl SedonaScalarKernel for RsSize { None => builder.append_null(), Some(raster) => match self.size_type { SizeType::Width => { - let width = raster.metadata().width(); + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine width".into(), + )); + }; builder.append_value(width); } SizeType::Height => { - let height = raster.metadata().height(); + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine height".into(), + )); + }; builder.append_value(height); } }, diff --git a/rust/sedona-raster-functions/src/rs_spatial_predicates.rs b/rust/sedona-raster-functions/src/rs_spatial_predicates.rs index b0eaa0574..4a1d73a99 100644 --- a/rust/sedona-raster-functions/src/rs_spatial_predicates.rs +++ b/rust/sedona-raster-functions/src/rs_spatial_predicates.rs @@ -377,8 +377,18 @@ const CONVEXHULL_WKB_SIZE: usize = 93; /// Create WKB for a convex hull polygon for the raster fn write_convexhull_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no spatial dimensions; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; let (ulx, uly) = to_world_coordinate(raster, 0, 0); let (urx, ury) = to_world_coordinate(raster, width, 0); @@ -401,13 +411,12 @@ mod tests { use datafusion_expr::ScalarUDF; use rstest::rstest; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::crs::deserialize_crs; use sedona_schema::crs::OGC_CRS84_PROJJSON; use sedona_schema::datatypes::Edges; use sedona_schema::datatypes::RASTER; use sedona_schema::datatypes::WKB_GEOMETRY; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::create::create_array as create_geom_array; use sedona_testing::rasters::generate_test_rasters; @@ -435,26 +444,10 @@ mod tests { /// If `crs` is `None`, the raster has no CRS. fn build_unit_raster(crs: Option<&str>) -> arrow_array::StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 1.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, crs).unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(1, 1, 0.0, 1.0, 1.0, -1.0, 0.0, 0.0, crs) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-raster-functions/src/rs_srid.rs b/rust/sedona-raster-functions/src/rs_srid.rs index a9b472aeb..614efa0b2 100644 --- a/rust/sedona-raster-functions/src/rs_srid.rs +++ b/rust/sedona-raster-functions/src/rs_srid.rs @@ -126,9 +126,8 @@ mod tests { use datafusion_common::ScalarValue; use datafusion_expr::ScalarUDF; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::datatypes::RASTER; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::rasters::generate_test_rasters; use sedona_testing::testers::ScalarUdfTester; @@ -224,26 +223,10 @@ mod tests { } fn append_1x1_raster_with_crs(builder: &mut RasterBuilder, crs: Option<&str>) { - let raster_metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&raster_metadata, crs).unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(1, 1, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, crs) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml index 7407a0507..b2da151eb 100644 --- a/rust/sedona-raster/Cargo.toml +++ b/rust/sedona-raster/Cargo.toml @@ -40,3 +40,4 @@ sedona-schema = { workspace = true } [dev-dependencies] sedona-testing = { workspace = true } approx = { workspace = true } +arrow-ipc = { workspace = true } diff --git a/rust/sedona-raster/src/affine_transformation.rs b/rust/sedona-raster/src/affine_transformation.rs index ca6441e73..e4f58d455 100644 --- a/rust/sedona-raster/src/affine_transformation.rs +++ b/rust/sedona-raster/src/affine_transformation.rs @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -use crate::traits::{MetadataRef, RasterRef}; +use crate::traits::RasterRef; use arrow_schema::ArrowError; -/// Pre-computed affine transformation coefficients extracted from raster metadata. +/// Pre-computed affine transformation coefficients. /// -/// Constructing this struct pays the cost of reading metadata once (which may involve -/// vtable dispatch for Arrow-backed rasters). Subsequent `transform` / `inv_transform` -/// calls are pure arithmetic with no virtual calls. +/// Constructing this struct pays the cost of reading the transform once. +/// Subsequent `transform` / `inv_transform` calls are pure arithmetic. +/// +/// The 6-element GDAL GeoTransform convention is: +/// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` #[derive(Debug, Clone)] pub struct AffineMatrix { pub offset_x: f64, @@ -34,16 +36,23 @@ pub struct AffineMatrix { } impl AffineMatrix { - /// Build an `AffineMatrix` from any `MetadataRef` implementer. + /// Build an `AffineMatrix` from a 6-element GDAL GeoTransform slice. + /// + /// Index mapping: `[0]=origin_x, [1]=scale_x, [2]=skew_x, [3]=origin_y, [4]=skew_y, [5]=scale_y` #[inline] - pub fn from_metadata(m: &dyn MetadataRef) -> Self { + pub fn from_transform(t: &[f64]) -> Self { + debug_assert!( + t.len() >= 6, + "transform slice must have at least 6 elements, got {}", + t.len() + ); Self { - offset_x: m.upper_left_x(), - offset_y: m.upper_left_y(), - scale_x: m.scale_x(), - scale_y: m.scale_y(), - skew_x: m.skew_x(), - skew_y: m.skew_y(), + offset_x: t[0], + scale_x: t[1], + skew_x: t[2], + offset_y: t[3], + skew_y: t[4], + scale_y: t[5], } } @@ -92,29 +101,28 @@ impl AffineMatrix { } } -/// Computes the rotation angle (in radians) of the raster based on its geotransform metadata. +/// Computes the rotation angle (in radians) of the raster based on its geotransform. #[inline] pub fn rotation(raster: &dyn RasterRef) -> f64 { - let metadata = raster.metadata(); - (-metadata.skew_x()).atan2(metadata.scale_x()) + let t = raster.transform(); + (-t[2]).atan2(t[1]) // skew_x=t[2], scale_x=t[1] } -/// Performs an affine transformation on the provided x and y coordinates based on the geotransform -/// data in the raster. +/// Performs an affine transformation on the provided x and y coordinates based on the geotransform. /// /// # Arguments -/// * `raster` - Reference to the raster containing metadata +/// * `raster` - Reference to the raster containing transform /// * `x` - X coordinate in pixel space (column) /// * `y` - Y coordinate in pixel space (row) #[inline] pub fn to_world_coordinate(raster: &dyn RasterRef, x: i64, y: i64) -> (f64, f64) { - AffineMatrix::from_metadata(raster.metadata()).transform(x as f64, y as f64) + AffineMatrix::from_transform(raster.transform()).transform(x as f64, y as f64) } /// Performs the inverse affine transformation to convert world coordinates back to raster pixel coordinates. /// /// # Arguments -/// * `raster` - Reference to the raster containing metadata +/// * `raster` - Reference to the raster containing transform /// * `world_x` - X coordinate in world space /// * `world_y` - Y coordinate in world space #[inline] @@ -124,139 +132,123 @@ pub fn to_raster_coordinate( world_y: f64, ) -> Result<(i64, i64), ArrowError> { let (rx, ry) = - AffineMatrix::from_metadata(raster.metadata()).inv_transform(world_x, world_y)?; + AffineMatrix::from_transform(raster.transform()).inv_transform(world_x, world_y)?; Ok((rx as i64, ry as i64)) } #[cfg(test)] mod tests { use super::*; - use crate::traits::{MetadataRef, RasterMetadata}; use approx::assert_relative_eq; use std::f64::consts::FRAC_1_SQRT_2; use std::f64::consts::PI; + /// Minimal RasterRef implementation for testing affine transforms. struct TestRaster { - metadata: RasterMetadata, + transform: [f64; 6], + } + + /// Construct a TestRaster with named fields for readability. + fn test_raster( + origin_x: f64, + origin_y: f64, + scale_x: f64, + scale_y: f64, + skew_x: f64, + skew_y: f64, + ) -> TestRaster { + TestRaster { + transform: [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y], + } } impl RasterRef for TestRaster { - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata + fn num_bands(&self) -> usize { + 0 + } + fn band(&self, _index: usize) -> Option> { + None + } + fn band_name(&self, _index: usize) -> Option<&str> { + None } fn crs(&self) -> Option<&str> { None } - fn bands(&self) -> &dyn crate::traits::BandsRef { - unimplemented!() + fn transform(&self) -> &[f64] { + &self.transform + } + fn spatial_dims(&self) -> Vec<&str> { + vec!["x", "y"] + } + fn spatial_shape(&self) -> &[i64] { + &[] } } #[test] fn test_rotation() { - // 0 degree rotation -> gt[1.0, 0.0, 0.0, -1.0] - let raster = rotation_raster(1.0, -1.0, 0.0, 0.0); - let rot = rotation(&raster); - assert_eq!(rot, 0.0); - - // pi/2 -> gt[0.0, -1.0, 1.0, 0.0] - let raster = rotation_raster(0.0, 0.0, -1.0, 1.0); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 2.0, epsilon = 1e-6); // 90 degrees in radians - - // pi/4 -> gt[0.70710678, -0.70710678, 0.70710678, 0.70710678] - let raster = rotation_raster(FRAC_1_SQRT_2, FRAC_1_SQRT_2, -FRAC_1_SQRT_2, FRAC_1_SQRT_2); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 4.0, epsilon = 1e-6); // 45 degrees in radians - - // pi/3 -> gt[0.5, -0.866025, 0.866025, 0.5] - let raster = rotation_raster(0.5, 0.5, -0.866025, 0.866025); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 3.0, epsilon = 1e-6); // 60 degrees in radians - - // pi -> gt[-1.0, 0.0, 0.0, -1.0] - let raster = rotation_raster(-1.0, -1.0, 0.0, 0.0); - let rot = rotation(&raster); - assert_relative_eq!(rot, -PI, epsilon = 1e-6); // 180 degrees in radians + // 0 degree rotation + let raster = test_raster(0.0, 0.0, 1.0, -1.0, 0.0, 0.0); + assert_eq!(rotation(&raster), 0.0); + + // pi/2 + let raster = test_raster(0.0, 0.0, 0.0, 0.0, -1.0, 1.0); + assert_relative_eq!(rotation(&raster), PI / 2.0, epsilon = 1e-6); + + // pi/4 + let raster = test_raster( + 0.0, + 0.0, + FRAC_1_SQRT_2, + FRAC_1_SQRT_2, + -FRAC_1_SQRT_2, + FRAC_1_SQRT_2, + ); + assert_relative_eq!(rotation(&raster), PI / 4.0, epsilon = 1e-6); + + // pi/3 + let raster = test_raster(0.0, 0.0, 0.5, 0.5, -0.866025, 0.866025); + assert_relative_eq!(rotation(&raster), PI / 3.0, epsilon = 1e-6); + + // pi + let raster = test_raster(0.0, 0.0, -1.0, -1.0, 0.0, 0.0); + assert_relative_eq!(rotation(&raster), -PI, epsilon = 1e-6); } #[test] fn test_to_world_coordinate() { - // Test case with rotation/skew - let raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }, - }; - - let (wx, wy) = to_world_coordinate(&raster, 0, 0); - assert_eq!((wx, wy), (100.0, 200.0)); - - let (wx, wy) = to_world_coordinate(&raster, 5, 10); - assert_eq!((wx, wy), (107.5, 182.5)); + let raster = test_raster(100.0, 200.0, 1.0, -2.0, 0.25, 0.5); - let (wx, wy) = to_world_coordinate(&raster, 9, 19); - assert_eq!((wx, wy), (113.75, 166.5)); - - let (wx, wy) = to_world_coordinate(&raster, 1, 0); - assert_eq!((wx, wy), (101.0, 200.5)); - - let (wx, wy) = to_world_coordinate(&raster, 0, 1); - assert_eq!((wx, wy), (100.25, 198.0)); + assert_eq!(to_world_coordinate(&raster, 0, 0), (100.0, 200.0)); + assert_eq!(to_world_coordinate(&raster, 5, 10), (107.5, 182.5)); + assert_eq!(to_world_coordinate(&raster, 9, 19), (113.75, 166.5)); + assert_eq!(to_world_coordinate(&raster, 1, 0), (101.0, 200.5)); + assert_eq!(to_world_coordinate(&raster, 0, 1), (100.25, 198.0)); } #[test] fn test_to_raster_coordinate() { - // Test case with rotation/skew - let raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }, - }; - // Reverse of the to_world_coordinate tests - let (wx, wy) = to_raster_coordinate(&raster, 100.0, 200.0).unwrap(); - assert_eq!((wx, wy), (0, 0)); - - let (wx, wy) = to_raster_coordinate(&raster, 107.5, 182.5).unwrap(); - assert_eq!((wx, wy), (5, 10)); - - let (wx, wy) = to_raster_coordinate(&raster, 113.75, 166.5).unwrap(); - assert_eq!((wx, wy), (9, 19)); - - let (wx, wy) = to_raster_coordinate(&raster, 101.0, 200.5).unwrap(); - assert_eq!((wx, wy), (1, 0)); - - let (wx, wy) = to_raster_coordinate(&raster, 100.25, 198.0).unwrap(); - assert_eq!((wx, wy), (0, 1)); - - // Check error handling for zero determinant - let bad_raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: 0.0, - skew_x: 0.0, - skew_y: 0.0, - }, - }; + let raster = test_raster(100.0, 200.0, 1.0, -2.0, 0.25, 0.5); + + assert_eq!(to_raster_coordinate(&raster, 100.0, 200.0).unwrap(), (0, 0)); + assert_eq!( + to_raster_coordinate(&raster, 107.5, 182.5).unwrap(), + (5, 10) + ); + assert_eq!( + to_raster_coordinate(&raster, 113.75, 166.5).unwrap(), + (9, 19) + ); + assert_eq!(to_raster_coordinate(&raster, 101.0, 200.5).unwrap(), (1, 0)); + assert_eq!( + to_raster_coordinate(&raster, 100.25, 198.0).unwrap(), + (0, 1) + ); + + // Zero determinant + let bad_raster = test_raster(100.0, 200.0, 1.0, 0.0, 0.0, 0.0); let result = to_raster_coordinate(&bad_raster, 100.0, 200.0); assert!(result.is_err()); assert!(result @@ -266,21 +258,6 @@ mod tests { .contains("determinant is zero.")); } - fn rotation_raster(scale_x: f64, scale_y: f64, skew_x: f64, skew_y: f64) -> TestRaster { - TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x, - scale_y, - skew_x, - skew_y, - }, - } - } - fn test_affine() -> AffineMatrix { AffineMatrix { offset_x: 100.0, @@ -324,11 +301,6 @@ mod tests { }; let result = a.inv_transform(0.0, 0.0); assert!(result.is_err()); - assert!(result - .err() - .unwrap() - .to_string() - .contains("determinant is zero.")); } #[test] @@ -345,23 +317,14 @@ mod tests { } #[test] - fn test_affine_from_metadata() { - let m = RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }; - let a = AffineMatrix::from_metadata(&m); + fn test_affine_from_transform() { + let t = [100.0, 1.0, 0.25, 200.0, 0.5, -2.0]; + let a = AffineMatrix::from_transform(&t); assert_eq!(a.offset_x, 100.0); - assert_eq!(a.offset_y, 200.0); assert_eq!(a.scale_x, 1.0); - assert_eq!(a.scale_y, -2.0); assert_eq!(a.skew_x, 0.25); + assert_eq!(a.offset_y, 200.0); assert_eq!(a.skew_y, 0.5); + assert_eq!(a.scale_y, -2.0); } } diff --git a/rust/sedona-raster/src/array.rs b/rust/sedona-raster/src/array.rs index 07a4bce04..4b8f6e3f8 100644 --- a/rust/sedona-raster/src/array.rs +++ b/rust/sedona-raster/src/array.rs @@ -15,445 +15,404 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; + use arrow_array::{ - Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray, StringViewArray, - StructArray, UInt32Array, UInt64Array, + Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray, StringArray, + StringViewArray, StructArray, UInt32Array, UInt64Array, }; use arrow_schema::ArrowError; -use crate::traits::{ - BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef, RasterMetadata, RasterRef, -}; -use sedona_schema::raster::{ - band_indices, band_metadata_indices, metadata_indices, raster_indices, BandDataType, - StorageType, -}; +use crate::traits::{BandRef, NdBuffer, RasterRef, ViewEntry}; +use sedona_schema::raster::{band_indices, raster_indices, BandDataType}; -/// Implement MetadataRef for RasterMetadata to allow direct use with builder -impl MetadataRef for RasterMetadata { - fn width(&self) -> u64 { - self.width - } - fn height(&self) -> u64 { - self.height - } - fn upper_left_x(&self) -> f64 { - self.upperleft_x - } - fn upper_left_y(&self) -> f64 { - self.upperleft_y - } - fn scale_x(&self) -> f64 { - self.scale_x - } - fn scale_y(&self) -> f64 { - self.scale_y - } - fn skew_x(&self) -> f64 { - self.skew_x - } - fn skew_y(&self) -> f64 { - self.skew_y - } -} +// --------------------------------------------------------------------------- +// Band implementation (Arrow-backed) +// --------------------------------------------------------------------------- -/// Implementation of MetadataRef for Arrow StructArray -struct MetadataRefImpl<'a> { - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - index: usize, +/// Arrow-backed implementation of BandRef for a single band within a raster. +/// +/// Today this handles only the canonical identity view: `view_entries` is +/// synthesised from `source_shape`, `visible_shape == source_shape`, +/// and `byte_strides` are plain C-order strides with `byte_offset = 0`. +struct BandRefImpl<'a> { + dim_names_list: &'a ListArray, + dim_names_values: &'a StringArray, + source_shape_list: &'a ListArray, + source_shape_values: &'a UInt64Array, + nodata_array: &'a BinaryArray, + outdb_uri_array: &'a StringArray, + outdb_format_array: &'a StringViewArray, + data_array: &'a BinaryViewArray, + /// Absolute row index within the flattened bands arrays + band_row: usize, + /// Resolved at construction so accessors don't re-decode the discriminant. + data_type: BandDataType, + /// Per-visible-axis view, length = ndim. Always identity today. + view_entries: Vec, + /// Visible shape, length = ndim. Equals `source_shape` today. + visible_shape: Vec, + /// Byte strides per visible axis. C-order over `source_shape` today. + byte_strides: Vec, + /// Byte offset into `data` of the visible region's `[0,...,0]` element. + byte_offset: u64, } -impl<'a> MetadataRef for MetadataRefImpl<'a> { - #[inline(always)] - fn width(&self) -> u64 { - self.width_array.value(self.index) - } - - #[inline(always)] - fn height(&self) -> u64 { - self.height_array.value(self.index) +impl<'a> BandRef for BandRefImpl<'a> { + fn ndim(&self) -> usize { + self.view_entries.len() } - #[inline(always)] - fn upper_left_x(&self) -> f64 { - self.upper_left_x_array.value(self.index) + fn dim_names(&self) -> Vec<&str> { + let start = self.dim_names_list.value_offsets()[self.band_row] as usize; + let end = self.dim_names_list.value_offsets()[self.band_row + 1] as usize; + (start..end) + .map(|i| self.dim_names_values.value(i)) + .collect() } - #[inline(always)] - fn upper_left_y(&self) -> f64 { - self.upper_left_y_array.value(self.index) + fn shape(&self) -> &[u64] { + &self.visible_shape } - #[inline(always)] - fn scale_x(&self) -> f64 { - self.scale_x_array.value(self.index) + fn raw_source_shape(&self) -> &[u64] { + let start = self.source_shape_list.value_offsets()[self.band_row] as usize; + let end = self.source_shape_list.value_offsets()[self.band_row + 1] as usize; + &self.source_shape_values.values()[start..end] } - #[inline(always)] - fn scale_y(&self) -> f64 { - self.scale_y_array.value(self.index) + fn view(&self) -> &[ViewEntry] { + &self.view_entries } - #[inline(always)] - fn skew_x(&self) -> f64 { - self.skew_x_array.value(self.index) + fn data_type(&self) -> BandDataType { + self.data_type } - #[inline(always)] - fn skew_y(&self) -> f64 { - self.skew_y_array.value(self.index) - } -} - -/// Implementation of BandMetadataRef for Arrow StructArray -struct BandMetadataRefImpl<'a> { - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, - band_index: usize, -} - -impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { - fn nodata_value(&self) -> Option<&[u8]> { - if self.nodata_array.is_null(self.band_index) { + fn nodata(&self) -> Option<&[u8]> { + if self.nodata_array.is_null(self.band_row) { None } else { - Some(self.nodata_array.value(self.band_index)) + Some(self.nodata_array.value(self.band_row)) } } - fn storage_type(&self) -> Result { - let value = self.storage_type_array.value(self.band_index); - let storage_type = match value { - 0 => StorageType::InDb, - 1 => StorageType::OutDbRef, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown storage type: {}", - value - ))) - } - }; - Ok(storage_type) - } - - fn data_type(&self) -> Result { - let value = self.datatype_array.value(self.band_index); - let band_data_type = match value { - 1 => BandDataType::UInt8, - 2 => BandDataType::UInt16, - 3 => BandDataType::Int16, - 4 => BandDataType::UInt32, - 5 => BandDataType::Int32, - 6 => BandDataType::Float32, - 7 => BandDataType::Float64, - 8 => BandDataType::UInt64, - 9 => BandDataType::Int64, - 10 => BandDataType::Int8, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown band data type: {}", - self.datatype_array.value(self.band_index) - ))) - } - }; - Ok(band_data_type) - } - - fn outdb_url(&self) -> Option<&str> { - if self.outdb_url_array.is_null(self.band_index) { + fn outdb_uri(&self) -> Option<&str> { + if self.outdb_uri_array.is_null(self.band_row) { None } else { - Some(self.outdb_url_array.value(self.band_index)) + Some(self.outdb_uri_array.value(self.band_row)) } } - fn outdb_band_id(&self) -> Option { - if self.outdb_band_id_array.is_null(self.band_index) { + fn outdb_format(&self) -> Option<&str> { + if self.outdb_format_array.is_null(self.band_row) { None } else { - Some(self.outdb_band_id_array.value(self.band_index)) + Some(self.outdb_format_array.value(self.band_row)) } } -} -/// Implementation of BandRef for accessing individual band data -struct BandRefImpl<'a> { - band_metadata: BandMetadataRefImpl<'a>, - band_data: &'a [u8], -} + fn is_indb(&self) -> bool { + !self.data_array.value(self.band_row).is_empty() + } -impl<'a> BandRef for BandRefImpl<'a> { - fn metadata(&self) -> &dyn BandMetadataRef { - &self.band_metadata + fn nd_buffer(&self) -> Result, ArrowError> { + Ok(NdBuffer { + buffer: self.data_array.value(self.band_row), + shape: &self.visible_shape, + strides: &self.byte_strides, + offset: self.byte_offset, + data_type: self.data_type, + }) } - fn data(&self) -> &[u8] { - self.band_data + fn contiguous_data(&self) -> Result, ArrowError> { + // Identity-view only today, so the data buffer is already row-major + // over the visible region. + Ok(Cow::Borrowed(self.data_array.value(self.band_row))) } } -/// Implementation of BandsRef for accessing all bands in a raster -struct BandsRefImpl<'a> { - bands_list: &'a ListArray, +// --------------------------------------------------------------------------- +// Raster implementation (Arrow-backed) +// --------------------------------------------------------------------------- + +/// Arrow-backed implementation of RasterRef for a single raster row. +pub struct RasterRefImpl<'a> { + raster_struct_array: &'a RasterStructArray<'a>, raster_index: usize, - // Direct references to the metadata and data arrays - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, - band_data_array: &'a BinaryViewArray, } -impl<'a> BandsRef for BandsRefImpl<'a> { - fn len(&self) -> usize { - self.bands_list.value_length(self.raster_index) as usize +impl<'a> RasterRefImpl<'a> { + /// Returns the raw CRS string reference with the array's lifetime. + pub fn crs_str_ref(&self) -> Option<&'a str> { + if self + .raster_struct_array + .crs_array + .is_null(self.raster_index) + { + None + } else { + Some(self.raster_struct_array.crs_array.value(self.raster_index)) + } + } +} + +impl<'a> RasterRef for RasterRefImpl<'a> { + fn num_bands(&self) -> usize { + self.raster_struct_array + .bands_list + .value_length(self.raster_index) as usize } - /// Get a specific band by number (1-based index) - fn band(&self, number: usize) -> Result, ArrowError> { - if number == 0 { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid band number {number}: band numbers must be 1-based" - ))); - } - // By convention, band numbers are 1-based. - // Convert to zero-based index. - let index = number - 1; - if index >= self.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Band number {} is out of range: this raster has {} bands", - number, - self.len() - ))); + fn band(&self, index: usize) -> Option> { + if index >= self.num_bands() { + return None; } - - let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; let band_row = start + index; - let band_metadata = BandMetadataRefImpl { - nodata_array: self.nodata_array, - storage_type_array: self.storage_type_array, - datatype_array: self.datatype_array, - outdb_url_array: self.outdb_url_array, - outdb_band_id_array: self.outdb_band_id_array, - band_index: band_row, - }; - - let band_data = self.band_data_array.value(band_row); + let arr = self.raster_struct_array; - Ok(Box::new(BandRefImpl { - band_metadata, - band_data, - })) - } + // Read source shape slice. + let ss_start = arr.band_source_shape_list.value_offsets()[band_row] as usize; + let ss_end = arr.band_source_shape_list.value_offsets()[band_row + 1] as usize; + let source_shape: &[u64] = &arr.band_source_shape_values.values()[ss_start..ss_end]; - fn iter(&self) -> Box + '_> { - Box::new(BandIteratorImpl { - bands: self, - current: 1, // Start at 1 for 1-based band numbering - }) - } -} + // Reject 0-D bands at the read boundary. Schema doesn't forbid them + // outright but every consumer assumes ndim >= 1. + if source_shape.is_empty() { + return None; + } -/// Concrete implementation of BandIterator trait -pub struct BandIteratorImpl<'a> { - bands: &'a dyn BandsRef, - current: usize, -} + // Resolve data type up front; an unknown discriminant is a + // schema-corruption bug, not user data, so failing the band is + // appropriate. + let data_type_value = arr.band_datatype_array.value(band_row); + let data_type = BandDataType::try_from_u32(data_type_value)?; + + // Only the canonical identity view (null view row) is written today. + // A non-null view row would require the view → byte-stride composition + // path that is deferred to a follow-up; reject it here so callers see + // a clean "no band" rather than a panic. + if !arr.band_view_list.is_null(band_row) { + return None; + } + let view_entries: Vec = source_shape + .iter() + .enumerate() + .map(|(i, &s)| ViewEntry { + source_axis: i as i64, + start: 0, + step: 1, + steps: s as i64, + }) + .collect(); -impl<'a> Iterator for BandIteratorImpl<'a> { - type Item = Box; + let visible_shape: Vec = source_shape.to_vec(); - fn next(&mut self) -> Option { - // current is 1-based, compare against len() + 1 - if self.current <= self.bands.len() { - let band = self.bands.band(self.current).ok(); // Convert Result to Option - self.current += 1; - band - } else { - None + let dtype_size = data_type.byte_size() as i64; + // C-order byte strides over the source_shape: + // byte_strides[k] = dtype_size * Π_{j>k} source_shape[j] + let mut byte_strides = vec![0i64; source_shape.len()]; + byte_strides[source_shape.len() - 1] = dtype_size; + for k in (0..source_shape.len() - 1).rev() { + byte_strides[k] = byte_strides[k + 1] * (source_shape[k + 1] as i64); } - } - fn size_hint(&self) -> (usize, Option) { - // current is 1-based, so remaining calculation needs adjustment - let remaining = self.bands.len().saturating_sub(self.current - 1); - (remaining, Some(remaining)) + Some(Box::new(BandRefImpl { + dim_names_list: arr.band_dim_names_list, + dim_names_values: arr.band_dim_names_values, + source_shape_list: arr.band_source_shape_list, + source_shape_values: arr.band_source_shape_values, + nodata_array: arr.band_nodata_array, + outdb_uri_array: arr.band_outdb_uri_array, + outdb_format_array: arr.band_outdb_format_array, + data_array: arr.band_data_array, + band_row, + data_type, + view_entries, + visible_shape, + byte_strides, + byte_offset: 0, + })) } -} -impl<'a> BandIterator<'a> for BandIteratorImpl<'a> { - fn len(&self) -> usize { - // current is 1-based, so remaining calculation needs adjustment - self.bands.len().saturating_sub(self.current - 1) + fn band_data_type(&self, index: usize) -> Option { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + let value = self.raster_struct_array.band_datatype_array.value(band_row); + BandDataType::try_from_u32(value) } -} - -impl ExactSizeIterator for BandIteratorImpl<'_> {} - -/// Implementation of RasterRef for complete raster access -pub struct RasterRefImpl<'a> { - metadata: MetadataRefImpl<'a>, - crs: &'a StringViewArray, - bands: BandsRefImpl<'a>, -} -impl<'a> RasterRefImpl<'a> { - /// Creates a new RasterRefImpl that provides zero-copy access to the raster at the specified index. - /// - /// # Arguments - /// * `raster_struct_array` - The Arrow StructArray containing raster data - /// * `raster_index` - The zero-based index of the raster to access - #[inline(always)] - pub fn new(raster_struct_array: &RasterStructArray<'a>, raster_index: usize) -> Self { - let metadata = MetadataRefImpl { - width_array: raster_struct_array.width_array, - height_array: raster_struct_array.height_array, - upper_left_x_array: raster_struct_array.upper_left_x_array, - upper_left_y_array: raster_struct_array.upper_left_y_array, - scale_x_array: raster_struct_array.scale_x_array, - scale_y_array: raster_struct_array.scale_y_array, - skew_x_array: raster_struct_array.skew_x_array, - skew_y_array: raster_struct_array.skew_y_array, - index: raster_index, - }; - - let bands = BandsRefImpl { - bands_list: raster_struct_array.bands_list, - raster_index, - nodata_array: raster_struct_array.band_nodata_array, - storage_type_array: raster_struct_array.band_storage_type_array, - datatype_array: raster_struct_array.band_datatype_array, - outdb_url_array: raster_struct_array.band_outdb_url_array, - outdb_band_id_array: raster_struct_array.band_outdb_band_id_array, - band_data_array: raster_struct_array.band_data_array, - }; + fn band_outdb_uri(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + let arr = self.raster_struct_array.band_outdb_uri_array; + if arr.is_null(band_row) { + None + } else { + Some(arr.value(band_row)) + } + } - Self { - metadata, - crs: raster_struct_array.crs, - bands, + fn band_outdb_format(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + let arr = self.raster_struct_array.band_outdb_format_array; + if arr.is_null(band_row) { + None + } else { + Some(arr.value(band_row)) } } - pub fn crs_str_ref(&self) -> Option<&'a str> { - if self.crs.is_null(self.bands.raster_index) { + fn band_nodata(&self, index: usize) -> Option<&[u8]> { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + let arr = self.raster_struct_array.band_nodata_array; + if arr.is_null(band_row) { None } else { - Some(self.crs.value(self.bands.raster_index)) + Some(arr.value(band_row)) } } -} -impl<'a> RasterRef for RasterRefImpl<'a> { - #[inline(always)] - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata + fn band_name(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.raster_struct_array.band_name_array.is_null(band_row) { + None + } else { + Some(self.raster_struct_array.band_name_array.value(band_row)) + } } - #[inline(always)] fn crs(&self) -> Option<&str> { self.crs_str_ref() } - #[inline(always)] - fn bands(&self) -> &dyn BandsRef { - &self.bands + fn transform(&self) -> &[f64] { + let start = + self.raster_struct_array.transform_list.value_offsets()[self.raster_index] as usize; + let end = + self.raster_struct_array.transform_list.value_offsets()[self.raster_index + 1] as usize; + assert!( + end - start >= 6, + "transform list must have at least 6 elements for raster {}, got {}", + self.raster_index, + end - start + ); + &self.raster_struct_array.transform_values.values()[start..start + 6] + } + + fn spatial_dims(&self) -> Vec<&str> { + let offsets = self.raster_struct_array.spatial_dims_list.value_offsets(); + let start = offsets[self.raster_index] as usize; + let end = offsets[self.raster_index + 1] as usize; + (start..end) + .map(|i| self.raster_struct_array.spatial_dims_values.value(i)) + .collect() + } + + fn spatial_shape(&self) -> &[i64] { + let offsets = self.raster_struct_array.spatial_shape_list.value_offsets(); + let start = offsets[self.raster_index] as usize; + let end = offsets[self.raster_index + 1] as usize; + &self.raster_struct_array.spatial_shape_values.values()[start..end] } } -/// Access rasters from the Arrow StructArray +// --------------------------------------------------------------------------- +// RasterStructArray — efficient columnar access to rasters +// --------------------------------------------------------------------------- + +/// Access rasters from the Arrow StructArray. /// -/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Provides efficient, zero-copy access to N-D raster data stored in Arrow format. pub struct RasterStructArray<'a> { raster_array: &'a StructArray, - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - crs: &'a StringViewArray, + // Top-level fields + crs_array: &'a StringViewArray, + transform_list: &'a ListArray, + transform_values: &'a Float64Array, + spatial_dims_list: &'a ListArray, + spatial_dims_values: &'a StringViewArray, + spatial_shape_list: &'a ListArray, + spatial_shape_values: &'a Int64Array, bands_list: &'a ListArray, - band_nodata_array: &'a BinaryArray, - band_storage_type_array: &'a UInt32Array, + // Band-level fields (flattened across all bands in all rasters) + band_name_array: &'a StringArray, + band_dim_names_list: &'a ListArray, + band_dim_names_values: &'a StringArray, + band_source_shape_list: &'a ListArray, + band_source_shape_values: &'a UInt64Array, band_datatype_array: &'a UInt32Array, - band_outdb_url_array: &'a StringArray, - band_outdb_band_id_array: &'a UInt32Array, + band_nodata_array: &'a BinaryArray, + band_view_list: &'a ListArray, + band_outdb_uri_array: &'a StringArray, + band_outdb_format_array: &'a StringViewArray, band_data_array: &'a BinaryViewArray, } impl<'a> RasterStructArray<'a> { - /// Create a new RasterStructArray from an existing StructArray + /// Create a new RasterStructArray from an existing StructArray. #[inline] pub fn new(raster_array: &'a StructArray) -> Self { - let crs = raster_array + // Top-level fields + let crs_array = raster_array .column(raster_indices::CRS) .as_any() .downcast_ref::() .unwrap(); - - // Extract the metadata arrays for direct access - let metadata_struct = raster_array - .column(raster_indices::METADATA) - .as_any() - .downcast_ref::() - .unwrap(); - let width_array = metadata_struct - .column(metadata_indices::WIDTH) + let transform_list = raster_array + .column(raster_indices::TRANSFORM) .as_any() - .downcast_ref::() - .unwrap(); - let height_array = metadata_struct - .column(metadata_indices::HEIGHT) - .as_any() - .downcast_ref::() - .unwrap(); - let upper_left_x_array = metadata_struct - .column(metadata_indices::UPPERLEFT_X) - .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let upper_left_y_array = metadata_struct - .column(metadata_indices::UPPERLEFT_Y) + let transform_values = transform_list + .values() .as_any() .downcast_ref::() .unwrap(); - let scale_x_array = metadata_struct - .column(metadata_indices::SCALE_X) + let spatial_dims_list = raster_array + .column(raster_indices::SPATIAL_DIMS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let scale_y_array = metadata_struct - .column(metadata_indices::SCALE_Y) + let spatial_dims_values = spatial_dims_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_x_array = metadata_struct - .column(metadata_indices::SKEW_X) + let spatial_shape_list = raster_array + .column(raster_indices::SPATIAL_SHAPE) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_y_array = metadata_struct - .column(metadata_indices::SKEW_Y) + let spatial_shape_values = spatial_shape_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - // Extract the band arrays for direct access + // Bands list and nested struct let bands_list = raster_array .column(raster_indices::BANDS) .as_any() @@ -464,35 +423,57 @@ impl<'a> RasterStructArray<'a> { .as_any() .downcast_ref::() .unwrap(); - let band_metadata_struct = bands_struct - .column(band_indices::METADATA) + + // Band-level fields + let band_name_array = bands_struct + .column(band_indices::NAME) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_nodata_array = band_metadata_struct - .column(band_metadata_indices::NODATAVALUE) + let band_dim_names_list = bands_struct + .column(band_indices::DIM_NAMES) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_storage_type_array = band_metadata_struct - .column(band_metadata_indices::STORAGE_TYPE) + let band_dim_names_values = band_dim_names_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_datatype_array = band_metadata_struct - .column(band_metadata_indices::DATATYPE) + let band_source_shape_list = bands_struct + .column(band_indices::SOURCE_SHAPE) + .as_any() + .downcast_ref::() + .unwrap(); + let band_source_shape_values = band_source_shape_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let band_datatype_array = bands_struct + .column(band_indices::DATA_TYPE) .as_any() .downcast_ref::() .unwrap(); - let band_outdb_url_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_URL) + let band_nodata_array = bands_struct + .column(band_indices::NODATA) + .as_any() + .downcast_ref::() + .unwrap(); + let band_view_list = bands_struct + .column(band_indices::VIEW) + .as_any() + .downcast_ref::() + .unwrap(); + let band_outdb_uri_array = bands_struct + .column(band_indices::OUTDB_URI) .as_any() .downcast_ref::() .unwrap(); - let band_outdb_band_id_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_BAND_ID) + let band_outdb_format_array = bands_struct + .column(band_indices::OUTDB_FORMAT) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let band_data_array = bands_struct .column(band_indices::DATA) @@ -502,49 +483,55 @@ impl<'a> RasterStructArray<'a> { Self { raster_array, - width_array, - height_array, - upper_left_x_array, - upper_left_y_array, - scale_x_array, - scale_y_array, - skew_x_array, - skew_y_array, - crs, + crs_array, + transform_list, + transform_values, + spatial_dims_list, + spatial_dims_values, + spatial_shape_list, + spatial_shape_values, bands_list, - band_nodata_array, - band_storage_type_array, + band_name_array, + band_dim_names_list, + band_dim_names_values, + band_source_shape_list, + band_source_shape_values, band_datatype_array, - band_outdb_url_array, - band_outdb_band_id_array, + band_nodata_array, + band_view_list, + band_outdb_uri_array, + band_outdb_format_array, band_data_array, } } - /// Get the total number of rasters in the array + /// Get the total number of rasters in the array. #[inline(always)] pub fn len(&self) -> usize { self.raster_array.len() } - /// Check if the array is empty + /// Check if the array is empty. #[inline(always)] pub fn is_empty(&self) -> bool { self.raster_array.is_empty() } - /// Get a specific raster by index without consuming the iterator + /// Get a specific raster by index. #[inline(always)] - pub fn get(&self, index: usize) -> Result, ArrowError> { + pub fn get(&'a self, index: usize) -> Result, ArrowError> { if index >= self.raster_array.len() { return Err(ArrowError::InvalidArgumentError(format!( "Invalid raster index: {index}" ))); } - - Ok(RasterRefImpl::new(self, index)) + Ok(RasterRefImpl { + raster_struct_array: self, + raster_index: index, + }) } + /// Check if a raster at the given index is null. #[inline(always)] pub fn is_null(&self, index: usize) -> bool { self.raster_array.is_null(index) @@ -555,275 +542,322 @@ impl<'a> RasterStructArray<'a> { mod tests { use super::*; use crate::builder::RasterBuilder; - use crate::traits::{BandMetadata, RasterMetadata}; - use arrow_schema::DataType; - use sedona_schema::raster::{BandDataType, StorageType}; - use sedona_testing::rasters::generate_test_rasters; + use arrow_array::{ArrayRef, ListArray, StructArray, UInt32Array, UInt64Array}; + use arrow_buffer::{OffsetBuffer, ScalarBuffer}; + use arrow_schema::{DataType, Fields}; + use sedona_schema::raster::{band_indices, raster_indices, BandDataType, RasterSchema}; + use std::sync::Arc; + + /// Build a single-raster, single-band raster StructArray with the + /// canonical identity view. Used as the baseline input to the surgery + /// helpers below; callers replace one band-level column to simulate + /// schema corruption on non-view fields. + fn build_identity_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8, 1, 2]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Replace a single column of the bands struct, then rebuild the bands + /// list and the top-level raster struct. Schema-shape preserving — this + /// only swaps the array data, never the field type. + fn replace_band_column( + array: &StructArray, + column_index: usize, + new_column: ArrayRef, + ) -> StructArray { + let bands_list = array + .column(raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_struct = bands_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); - #[test] - fn test_array_basic_functionality() { - // Create a simple raster for testing using the correct API - let mut builder = RasterBuilder::new(10); // capacity - - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, + let mut columns: Vec = bands_struct.columns().to_vec(); + columns[column_index] = new_column; + let DataType::Struct(band_fields) = RasterSchema::band_type() else { + unreachable!("band_type must be Struct") }; + let new_bands_struct = + StructArray::new(band_fields, columns, bands_struct.nulls().cloned()); - let epsg4326 = "EPSG:4326"; - - builder.start_raster(&metadata, Some(epsg4326)).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, + let DataType::List(bands_field) = RasterSchema::bands_type() else { + unreachable!("bands_type must be List") }; + let new_bands_list = ListArray::new( + bands_field, + bands_list.offsets().clone(), + Arc::new(new_bands_struct), + bands_list.nulls().cloned(), + ); - // Add a single band with some test data using the correct API - builder.start_band(band_metadata.clone()).unwrap(); - let test_data = vec![1u8; 100]; // 10x10 raster with value 1 - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - // Test the array - let rasters = RasterStructArray::new(&raster_array); - - assert_eq!(rasters.len(), 1); - assert!(!rasters.is_empty()); - - let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); - - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 10); - assert_eq!(metadata.scale_x(), 1.0); - assert_eq!(metadata.scale_y(), -1.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 1); - assert!(!bands.is_empty()); - - // Access band with 1-based band_number - let band = bands.band(1).unwrap(); - assert_eq!(band.data().len(), 100); - assert_eq!(band.data()[0], 1u8); - - let band_meta = band.metadata(); - assert_eq!(band_meta.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_meta.data_type().unwrap(), BandDataType::UInt8); + let mut top_columns: Vec = array.columns().to_vec(); + top_columns[raster_indices::BANDS] = Arc::new(new_bands_list); + let raster_fields = RasterSchema::fields(); + StructArray::new( + Fields::from(raster_fields.to_vec()), + top_columns, + array.nulls().cloned(), + ) + } - let crs = raster.crs().unwrap(); - assert_eq!(crs, epsg4326); + // ---- Critical #2: bad data_type discriminant ---- - // Test array over bands - let band_iter: Vec<_> = bands.iter().collect(); - assert_eq!(band_iter.len(), 1); + #[test] + fn band_and_band_data_type_return_none_for_unknown_discriminant() { + let array = build_identity_raster(); + let bad_dtype: ArrayRef = Arc::new(UInt32Array::from(vec![0xFFu32])); + let mutated = replace_band_column(&array, band_indices::DATA_TYPE, bad_dtype); + let rasters = RasterStructArray::new(&mutated); + let r = rasters.get(0).unwrap(); + assert!(r.band(0).is_none()); + assert!(r.band_data_type(0).is_none()); } + // ---- Critical #3 (reader side): empty source_shape ---- + #[test] - fn test_multi_band_array() { - let mut builder = RasterBuilder::new(3); - - let metadata = RasterMetadata { - width: 5, - height: 5, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, + fn band_returns_none_when_source_shape_is_empty() { + let array = build_identity_raster(); + // Replace source_shape with a single empty list row. + let DataType::List(ss_field) = RasterSchema::source_shape_type() else { + unreachable!() }; - - builder.start_raster(&metadata, None).unwrap(); - - // Add three bands using the correct API - for band_idx in 0..3 { - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - let test_data = vec![band_idx as u8; 25]; // 5x5 raster - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } - - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - let rasters = RasterStructArray::new(&raster_array); - let raster = rasters.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 3); - - // Test each band has different data - // Use 1-based band numbers - for i in 0..3 { - // Access band with 1-based band_number - let band = bands.band(i + 1).unwrap(); - let expected_value = i as u8; - assert!(band.data().iter().all(|&x| x == expected_value)); - } - - // Test array - let band_values: Vec = bands - .iter() - .enumerate() - .map(|(i, band)| { - assert_eq!(band.data()[0], i as u8); - band.data()[0] - }) - .collect(); - - assert_eq!(band_values, vec![0, 1, 2]); + let empty_source_shape = ListArray::new( + ss_field, + OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 0])), + Arc::new(UInt64Array::from(Vec::::new())), + None, + ); + let mutated = replace_band_column( + &array, + band_indices::SOURCE_SHAPE, + Arc::new(empty_source_shape), + ); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_none()); } - #[test] - fn test_raster_is_null() { - let raster_array = generate_test_rasters(2, Some(1)).unwrap(); - let rasters = RasterStructArray::new(&raster_array); - assert_eq!(rasters.len(), 2); - assert!(!rasters.is_null(0)); - assert!(rasters.is_null(1)); - } + // ---- Important #7: direct fast-path tests ---- - /// Test that `data_type()` and `storage_type()` return `Err` for invalid values - /// instead of panicking. #[test] - fn test_invalid_band_metadata_returns_err() { - use arrow_buffer::{OffsetBuffer, ScalarBuffer}; - use sedona_schema::raster::RasterSchema; - use std::sync::Arc; - - // Build a valid single-band raster first + fn raster_ref_fast_paths_return_expected_values() { + // Single 2-band raster: band 0 has explicit values for nodata, + // outdb_uri, outdb_format; band 1 has all-nullable fields null. let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, None).unwrap(); - let band_meta = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - builder.start_band(band_meta).unwrap(); - builder.band_data_writer().append_value([1u8; 4]); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band( + Some("a"), + &["y", "x"], + &[2, 3], + BandDataType::UInt16, + Some(&[0xFFu8, 0xFE]), + Some("s3://bucket/a.tif"), + Some("GTiff"), + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 12]); + builder.finish_band().unwrap(); + builder + .start_band( + Some("b"), + &["y", "x"], + &[2, 3], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 24]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let valid_array = builder.finish().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + // Bounds: out-of-range indices yield None on every fast path. + assert!(r.band_data_type(2).is_none()); + assert!(r.band_outdb_uri(2).is_none()); + assert!(r.band_outdb_format(2).is_none()); + assert!(r.band_nodata(2).is_none()); + + // Band 0 — non-null values. + assert_eq!(r.band_data_type(0), Some(BandDataType::UInt16)); + assert_eq!(r.band_outdb_uri(0), Some("s3://bucket/a.tif")); + assert_eq!(r.band_outdb_format(0), Some("GTiff")); + assert_eq!(r.band_nodata(0), Some(&[0xFFu8, 0xFE][..])); + + // Band 1 — null fields. + assert_eq!(r.band_data_type(1), Some(BandDataType::Float32)); + assert!(r.band_outdb_uri(1).is_none()); + assert!(r.band_outdb_format(1).is_none()); + assert!(r.band_nodata(1).is_none()); + + // Cross-check against the BandRef slow path. + let band0 = r.band(0).unwrap(); + assert_eq!(band0.data_type(), BandDataType::UInt16); + assert_eq!(band0.outdb_uri(), Some("s3://bucket/a.tif")); + assert_eq!(band0.outdb_format(), Some("GTiff")); + assert_eq!(band0.nodata(), Some(&[0xFFu8, 0xFE][..])); + } + + // ---- Important #9: multi-band, multi-raster identity ---- - // Extract original columns from the valid raster - let metadata_col = valid_array.column(raster_indices::METADATA).clone(); - let crs_col = valid_array.column(raster_indices::CRS).clone(); - let bands_list = valid_array - .column(raster_indices::BANDS) - .as_any() - .downcast_ref::() + #[test] + fn multi_raster_identity_views() { + // Two rasters with multiple identity bands each. Exercises the + // `bands_list.value_offsets()` routing for every per-band lookup — + // a naive reader that forgets to add the per-raster offset would + // hand back data from the wrong band. + let mut builder = RasterBuilder::new(2); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + + // Raster 0: three identity bands. + builder + .start_raster(&transform, &["x"], &[3], None) .unwrap(); - let bands_struct = bands_list - .values() - .as_any() - .downcast_ref::() + builder + .start_band(None, &["x"], &[3], BandDataType::UInt8, None, None, None) .unwrap(); - let orig_band_meta_struct = bands_struct - .column(band_indices::METADATA) - .as_any() - .downcast_ref::() + builder.band_data_writer().append_value(vec![10u8, 20, 30]); + builder.finish_band().unwrap(); + builder + .start_band(None, &["x"], &[3], BandDataType::UInt8, None, None, None) .unwrap(); - let band_data_col = bands_struct.column(band_indices::DATA).clone(); + builder.band_data_writer().append_value(vec![40u8, 50, 60]); + builder.finish_band().unwrap(); + builder + .start_band(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![100u8, 101, 102]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Build tampered band metadata with invalid storage_type=99 and datatype=99 - let DataType::Struct(band_metadata_fields) = RasterSchema::band_metadata_type() else { - panic!("Expected struct type for band metadata"); - }; - let tampered_band_metadata = StructArray::new( - band_metadata_fields, - vec![ - orig_band_meta_struct - .column(band_metadata_indices::NODATAVALUE) - .clone(), - Arc::new(UInt32Array::from(vec![99u32])), // invalid storage_type - Arc::new(UInt32Array::from(vec![99u32])), // invalid datatype - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_URL) - .clone(), - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_BAND_ID) - .clone(), - ], - None, - ); + // Raster 1: two identity bands of a different shape. + builder + .start_raster(&transform, &["x"], &[4], None) + .unwrap(); + builder + .start_band(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![42u8, 43, 44, 45]); + builder.finish_band().unwrap(); + builder + .start_band(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![1u8, 2, 3, 4]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Rebuild band struct - let DataType::Struct(band_fields) = RasterSchema::band_type() else { - panic!("Expected struct type for band"); - }; - let tampered_band_struct = StructArray::new( - band_fields, - vec![Arc::new(tampered_band_metadata), band_data_col], - None, - ); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); - // Rebuild bands list - let DataType::List(band_field) = RasterSchema::bands_type() else { - panic!("Expected list type for bands"); - }; - let tampered_bands_list = ListArray::new( - band_field, - OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 1])), - Arc::new(tampered_band_struct), - None, + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.num_bands(), 3); + assert_eq!(r0.band(0).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(0).unwrap().contiguous_data().unwrap(), + &[10u8, 20, 30] + ); + assert_eq!(r0.band(1).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(1).unwrap().contiguous_data().unwrap(), + &[40u8, 50, 60] + ); + assert_eq!(r0.band(2).unwrap().shape(), &[3]); + assert_eq!( + &*r0.band(2).unwrap().contiguous_data().unwrap(), + &[100u8, 101, 102] ); - // Rebuild the top-level raster struct - let tampered_raster = StructArray::new( - RasterSchema::fields(), - vec![metadata_col, crs_col, Arc::new(tampered_bands_list)], - None, + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.num_bands(), 2); + assert_eq!(r1.band(0).unwrap().shape(), &[4]); + assert_eq!( + &*r1.band(0).unwrap().contiguous_data().unwrap(), + &[42u8, 43, 44, 45] + ); + assert_eq!(r1.band(1).unwrap().shape(), &[4]); + assert_eq!( + &*r1.band(1).unwrap().contiguous_data().unwrap(), + &[1u8, 2, 3, 4] ); - // Read back and verify that data_type() and storage_type() return Err - let rasters = RasterStructArray::new(&tampered_raster); - let raster = rasters.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - let band_meta = band.metadata(); + // Fast paths must honour the same offsets. + assert_eq!(r0.band_data_type(1), Some(BandDataType::UInt8)); + assert_eq!(r1.band_data_type(0), Some(BandDataType::UInt8)); + assert_eq!(r1.band_data_type(1), Some(BandDataType::UInt8)); + } - let storage_err = band_meta.storage_type().unwrap_err(); - assert!(storage_err.to_string().contains("Unknown storage type: 99")); + // ---- Important #10: null raster row, fast path ---- - let data_type_err = band_meta.data_type().unwrap_err(); - assert!(data_type_err - .to_string() - .contains("Unknown band data type: 99")); + #[test] + fn null_raster_row_fast_paths_return_none_after_non_null() { + // A non-null raster precedes the null one, so the underlying flat + // band arrays are non-empty. A naive fast path that forgets the + // bands_list.value_offsets() routing would return *raster 0's* + // band 0 metadata when asked for raster 1's band 0 — a real bug + // that a single-null-raster fixture cannot detect. + let mut builder = RasterBuilder::new(2); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band( + Some("a"), + &["x"], + &[3], + BandDataType::UInt16, + Some(&[0xFFu8, 0xFE]), + Some("s3://bucket/a.tif"), + Some("GTiff"), + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 6]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.append_null().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + + // Sanity: raster 0 still resolves correctly. + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.band_data_type(0), Some(BandDataType::UInt16)); + assert_eq!(r0.band_outdb_uri(0), Some("s3://bucket/a.tif")); + + // Raster 1 is null with zero bands. Every per-band lookup is + // out of range and must return None even though the flat + // underlying arrays still hold raster 0's data. + assert!(rasters.is_null(1)); + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.num_bands(), 0); + assert!(r1.band(0).is_none()); + assert!(r1.band_data_type(0).is_none()); + assert!(r1.band_outdb_uri(0).is_none()); + assert!(r1.band_outdb_format(0).is_none()); + assert!(r1.band_nodata(0).is_none()); } } diff --git a/rust/sedona-raster/src/builder.rs b/rust/sedona-raster/src/builder.rs index 3db236cb4..5e5e41368 100644 --- a/rust/sedona-raster/src/builder.rs +++ b/rust/sedona-raster/src/builder.rs @@ -17,325 +17,580 @@ use arrow_array::{ builder::{ - BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, StringBuilder, - StringViewBuilder, UInt32Builder, UInt64Builder, + ArrayBuilder, BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, + Int64Builder, StringBuilder, StringViewBuilder, UInt32Builder, UInt64Builder, }, Array, ArrayRef, ListArray, StructArray, }; -use arrow_buffer::{OffsetBuffer, ScalarBuffer}; -use arrow_schema::{ArrowError, DataType}; +use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_schema::ArrowError; use std::sync::Arc; +use sedona_schema::raster::BandDataType; use sedona_schema::raster::RasterSchema; -use crate::traits::{BandMetadata, MetadataRef}; +use arrow_schema::DataType; -/// Builder for constructing raster arrays with zero-copy band data writing +/// Builder for constructing N-D raster arrays. /// -/// Required steps to build a raster: -/// 1. Create a RasterBuilder with a specified capacity -/// 2. For each raster to add: -/// - Call `start_raster` with the appropriate metadata, CRS -/// - For each band in the raster: -/// - Call `start_band` with the band metadata -/// - Use `band_data_writer` to get a BinaryViewBuilder and write the band data -/// - Call `finish_band` to complete the band -/// - Call `finish_raster` to complete the raster -/// 3. After all rasters are added, call `finish` to get the final StructArray +/// # Usage /// -/// Example usage: /// ``` -/// use sedona_raster::traits::{RasterMetadata, BandMetadata}; -/// use sedona_schema::raster::{StorageType, BandDataType}; /// use sedona_raster::builder::RasterBuilder; +/// use sedona_schema::raster::BandDataType; /// /// let mut builder = RasterBuilder::new(1); -/// let metadata = RasterMetadata { -/// width: 100, height: 100, -/// upperleft_x: 0.0, upperleft_y: 0.0, -/// scale_x: 1.0, scale_y: -1.0, -/// skew_x: 0.0, skew_y: 0.0, -/// }; -/// // Start a raster from RasterMetadata struct -/// builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); /// -/// // Add a band: -/// let band_metadata = BandMetadata { -/// nodata_value: Some(vec![0u8]), -/// storage_type: StorageType::InDb, -/// datatype: BandDataType::UInt8, -/// outdb_url: None, -/// outdb_band_id: None, -/// }; -/// builder.start_band(band_metadata).unwrap(); -/// let band_writer = builder.band_data_writer(); -/// band_writer.append_value(&vec![/* band data bytes */]); -/// builder.finish_band().unwrap(); +/// // 2D raster convenience: sets transform, spatial_dims=["x","y"], spatial_shape=[w,h] +/// builder.start_raster_2d(100, 100, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")).unwrap(); /// -/// // Finish the raster +/// // 2D band convenience: sets dim_names=["y","x"], shape=[h,w], contiguous strides +/// builder.start_band_2d(BandDataType::UInt8, Some(&[0u8])).unwrap(); +/// builder.band_data_writer().append_value(&vec![0u8; 10000]); +/// builder.finish_band().unwrap(); /// builder.finish_raster().unwrap(); /// -/// // Finish building and get the StructArray /// let raster_array = builder.finish().unwrap(); /// ``` pub struct RasterBuilder { - // Metadata fields - width: UInt64Builder, - height: UInt64Builder, - upper_left_x: Float64Builder, - upper_left_y: Float64Builder, - scale_x: Float64Builder, - scale_y: Float64Builder, - skew_x: Float64Builder, - skew_y: Float64Builder, - - // CRS field + // Top-level raster fields crs: StringViewBuilder, - - // Band metadata fields - band_nodata: BinaryBuilder, - band_storage_type: UInt32Builder, + transform_values: Float64Builder, + transform_offsets: Vec, + spatial_dims_values: StringViewBuilder, + spatial_dims_offsets: Vec, + spatial_shape_values: Int64Builder, + spatial_shape_offsets: Vec, + + // Band fields (flattened across all bands) + band_name: StringBuilder, + band_dim_names_values: StringBuilder, + band_dim_names_offsets: Vec, + band_shape_values: UInt64Builder, + band_shape_offsets: Vec, band_datatype: UInt32Builder, - band_outdb_url: StringBuilder, - band_outdb_band_id: UInt32Builder, - - // Band data field + band_nodata: BinaryBuilder, + // VIEW field — one entry per visible dimension per band. Stored as four + // parallel Int64 columns + a List offset vector; assembled into a + // `ListArray>` in `finish()`. + band_view_source_axis_values: Int64Builder, + band_view_start_values: Int64Builder, + band_view_step_values: Int64Builder, + band_view_steps_values: Int64Builder, + band_view_offsets: Vec, + // Per-band validity for the view list. `false` means the row is null — + // the canonical representation of an identity view. `true` means the row + // carries an explicit view in the four parallel value builders. + band_view_validity: Vec, + band_outdb_uri: StringBuilder, + band_outdb_format: StringViewBuilder, band_data: BinaryViewBuilder, // List structure tracking band_offsets: Vec, // Track where each raster's bands start/end current_band_count: i32, // Track bands in current raster - raster_validity: BooleanBuilder, // Track which rasters are null + // Current raster state (needed for start_band_2d) + current_width: u64, + current_height: u64, + + // Per-raster validation state: spatial dims/shape and recorded bands so + // finish_raster can check every band matches the top-level spatial grid. + current_spatial_dims: Vec, + current_spatial_shape: Vec, + current_raster_bands: Vec<(Vec, Vec)>, + + // Track band_data count at the start of each band for finish_band validation + band_data_count_at_start: usize, + + raster_validity: BooleanBuilder, } impl RasterBuilder { - /// Create a new raster builder with the specified capacity + /// Create a new raster builder with the specified capacity. pub fn new(capacity: usize) -> Self { Self { - // Metadata builders - width: UInt64Builder::with_capacity(capacity), - height: UInt64Builder::with_capacity(capacity), - upper_left_x: Float64Builder::with_capacity(capacity), - upper_left_y: Float64Builder::with_capacity(capacity), - scale_x: Float64Builder::with_capacity(capacity), - scale_y: Float64Builder::with_capacity(capacity), - skew_x: Float64Builder::with_capacity(capacity), - skew_y: Float64Builder::with_capacity(capacity), - - // CRS builder crs: StringViewBuilder::with_capacity(capacity), - - // Band builders - estimate some bands per raster - // The capacity is at raster level, but each raster has multiple bands and - // are large. We may want to add an optional parameter to control expected - // bands per raster or even band size in the future - band_nodata: BinaryBuilder::with_capacity(capacity, capacity), - band_storage_type: UInt32Builder::with_capacity(capacity), + transform_values: Float64Builder::with_capacity(capacity * 6), + transform_offsets: vec![0], + spatial_dims_values: StringViewBuilder::with_capacity(capacity * 2), + spatial_dims_offsets: vec![0], + spatial_shape_values: Int64Builder::with_capacity(capacity * 2), + spatial_shape_offsets: vec![0], + + band_name: StringBuilder::with_capacity(capacity, capacity), + band_dim_names_values: StringBuilder::with_capacity(capacity * 2, capacity * 4), + band_dim_names_offsets: vec![0], + band_shape_values: UInt64Builder::with_capacity(capacity * 2), + band_shape_offsets: vec![0], band_datatype: UInt32Builder::with_capacity(capacity), - band_outdb_url: StringBuilder::with_capacity(capacity, capacity), - band_outdb_band_id: UInt32Builder::with_capacity(capacity), + band_nodata: BinaryBuilder::with_capacity(capacity, capacity), + band_view_source_axis_values: Int64Builder::with_capacity(capacity * 2), + band_view_start_values: Int64Builder::with_capacity(capacity * 2), + band_view_step_values: Int64Builder::with_capacity(capacity * 2), + band_view_steps_values: Int64Builder::with_capacity(capacity * 2), + band_view_offsets: vec![0], + band_view_validity: Vec::with_capacity(capacity), + band_outdb_uri: StringBuilder::with_capacity(capacity, capacity), + band_outdb_format: StringViewBuilder::with_capacity(capacity), band_data: BinaryViewBuilder::with_capacity(capacity), - // List tracking band_offsets: vec![0], current_band_count: 0, + current_width: 0, + current_height: 0, + + current_spatial_dims: Vec::new(), + current_spatial_shape: Vec::new(), + current_raster_bands: Vec::new(), + + band_data_count_at_start: 0, - // Raster-level validity (keeps track of null rasters) raster_validity: BooleanBuilder::with_capacity(capacity), } } - /// Start a new raster with metadata and optional CRS + /// Start a new raster with explicit N-D parameters. + /// + /// `transform` must be a 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + /// + /// `spatial_dims` names the raster-level spatial dimensions (today always + /// length 2, e.g. `["x","y"]`). `spatial_shape` gives their sizes in the + /// same order. Every band added to this raster must contain each name in + /// `spatial_dims` within its own `dim_names`, with matching size. pub fn start_raster( &mut self, - metadata: &dyn MetadataRef, + transform: &[f64; 6], + spatial_dims: &[&str], + spatial_shape: &[i64], crs: Option<&str>, ) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata)?; - self.append_crs(crs)?; + if spatial_dims.len() != spatial_shape.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "spatial_dims.len() ({}) must equal spatial_shape.len() ({})", + spatial_dims.len(), + spatial_shape.len() + ))); + } + + // Transform + for &v in transform { + self.transform_values.append_value(v); + } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); + + // Spatial dims + shape + for d in spatial_dims { + self.spatial_dims_values.append_value(d); + } + let next = *self.spatial_dims_offsets.last().unwrap() + spatial_dims.len() as i32; + self.spatial_dims_offsets.push(next); + + for &s in spatial_shape { + self.spatial_shape_values.append_value(s); + } + let next = *self.spatial_shape_offsets.last().unwrap() + spatial_shape.len() as i32; + self.spatial_shape_offsets.push(next); + + // CRS + match crs { + Some(crs_data) => self.crs.append_value(crs_data), + None => self.crs.append_null(), + } - // Reset band count for this raster self.current_band_count = 0; + self.current_spatial_dims = spatial_dims.iter().map(|s| s.to_string()).collect(); + self.current_spatial_shape = spatial_shape.to_vec(); + self.current_raster_bands.clear(); + // Preserve legacy current_width/current_height for start_band_2d (set + // by start_raster_2d). Callers using this direct entry point drive + // their own shapes via start_band. + self.current_width = 0; + self.current_height = 0; + + Ok(()) + } + /// Convenience: start a 2D raster with the legacy 8-parameter interface. + /// + /// Sets `spatial_dims=["x","y"]`, `spatial_shape=[width, height]`, and + /// builds the 6-element GDAL transform from the individual parameters. + #[allow(clippy::too_many_arguments)] + pub fn start_raster_2d( + &mut self, + width: u64, + height: u64, + origin_x: f64, + origin_y: f64, + scale_x: f64, + scale_y: f64, + skew_x: f64, + skew_y: f64, + crs: Option<&str>, + ) -> Result<(), ArrowError> { + let transform = [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]; + self.start_raster(&transform, &["x", "y"], &[width as i64, height as i64], crs)?; + self.current_width = width; + self.current_height = height; Ok(()) } - /// Start a new band - this must be called before writing band data - pub fn start_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - // Append band metadata - match band_metadata.nodata_value { - Some(nodata) => self.band_nodata.append_value(&nodata), + /// Start a new band with explicit N-D parameters. + /// + /// `outdb_uri` is the *location* of the external resource (scheme is + /// resolved by an `ObjectStoreRegistry`). `outdb_format` is the *format* + /// used to interpret the bytes at that location (e.g. `"geotiff"`, + /// `"zarr"`). A null `outdb_format` means the band is in-memory — the + /// band's `data` buffer is authoritative. + #[allow(clippy::too_many_arguments)] + pub fn start_band( + &mut self, + name: Option<&str>, + dim_names: &[&str], + shape: &[u64], + data_type: BandDataType, + nodata: Option<&[u8]>, + outdb_uri: Option<&str>, + outdb_format: Option<&str>, + ) -> Result<(), ArrowError> { + if dim_names.is_empty() { + return Err(ArrowError::InvalidArgumentError( + "start_band: 0-dimensional bands are not supported".into(), + )); + } + if dim_names.len() != shape.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "start_band: dim_names ({}) and shape ({}) must have the same length", + dim_names.len(), + shape.len(), + ))); + } + // Name + match name { + Some(n) => self.band_name.append_value(n), + None => self.band_name.append_null(), + } + + // Dim names + for dn in dim_names { + self.band_dim_names_values.append_value(dn); + } + let next = *self.band_dim_names_offsets.last().unwrap() + dim_names.len() as i32; + self.band_dim_names_offsets.push(next); + + // Shape + for &s in shape { + self.band_shape_values.append_value(s); + } + let next = *self.band_shape_offsets.last().unwrap() + shape.len() as i32; + self.band_shape_offsets.push(next); + + // Data type + self.band_datatype.append_value(data_type as u32); + + // Nodata + match nodata { + Some(nodata_bytes) => self.band_nodata.append_value(nodata_bytes), None => self.band_nodata.append_null(), } - self.band_storage_type - .append_value(band_metadata.storage_type as u32); - self.band_datatype - .append_value(band_metadata.datatype as u32); + // VIEW: canonical identity is encoded as a null list entry — no + // values appended, offset unchanged, validity bit cleared. + let next = *self.band_view_offsets.last().unwrap(); + self.band_view_offsets.push(next); + self.band_view_validity.push(false); - match band_metadata.outdb_url { - Some(url) => self.band_outdb_url.append_value(&url), - None => self.band_outdb_url.append_null(), + // OutDb URI + match outdb_uri { + Some(uri) => self.band_outdb_uri.append_value(uri), + None => self.band_outdb_uri.append_null(), } - match band_metadata.outdb_band_id { - Some(band_id) => self.band_outdb_band_id.append_value(band_id), - None => self.band_outdb_band_id.append_null(), + // OutDb format + match outdb_format { + Some(format) => self.band_outdb_format.append_value(format), + None => self.band_outdb_format.append_null(), } self.current_band_count += 1; + self.band_data_count_at_start = self.band_data.len(); + + // Record this band's dims/shape for strict validation at finish_raster. + self.current_raster_bands.push(( + dim_names.iter().map(|s| s.to_string()).collect(), + shape.to_vec(), + )); Ok(()) } - /// Get direct access to the BinaryViewBuilder for writing the current band's data - /// Must be called after start_band() to write data to the current band + /// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`. + /// + /// Must be called after `start_raster_2d` which sets the current width/height. + pub fn start_band_2d( + &mut self, + data_type: BandDataType, + nodata: Option<&[u8]>, + ) -> Result<(), ArrowError> { + if self.current_width == 0 && self.current_height == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band_2d requires prior start_raster_2d (width and height are 0)".into(), + )); + } + self.start_band( + None, + &["y", "x"], + &[self.current_height, self.current_width], + data_type, + nodata, + None, + None, + ) + } + + /// Get direct access to the BinaryViewBuilder for writing the current band's data. pub fn band_data_writer(&mut self) -> &mut BinaryViewBuilder { &mut self.band_data } - /// Finish writing the current band + /// Finish writing the current band. + /// + /// Validates that exactly one data value was appended since `start_band()`. pub fn finish_band(&mut self) -> Result<(), ArrowError> { - // Band data should already be written via band_data_writer - // Nothing additional needed here since we're building flat + let current_count = self.band_data.len(); + if current_count != self.band_data_count_at_start + 1 { + return Err(ArrowError::InvalidArgumentError( + format!( + "Expected exactly one band data value per band, but got {} appended since start_band()", + current_count - self.band_data_count_at_start + ), + )); + } Ok(()) } - /// Finish all bands for the current raster + /// Finish all bands for the current raster. + /// + /// Strictly validates every band added since `start_raster`: each name in + /// the top-level `spatial_dims` must appear in the band's own `dim_names` + /// with a size matching the corresponding entry in `spatial_shape`. pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - // Record the end offset for this raster's bands + for (band_idx, (band_dims, band_shape)) in self.current_raster_bands.iter().enumerate() { + for (spatial_idx, spatial_dim) in self.current_spatial_dims.iter().enumerate() { + let pos = band_dims + .iter() + .position(|d| d == spatial_dim) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Band {band_idx} is missing spatial dimension {spatial_dim:?} \ + (band dim_names = {band_dims:?})" + )) + })?; + let expected = self.current_spatial_shape[spatial_idx]; + let actual = band_shape[pos] as i64; + if actual != expected { + return Err(ArrowError::InvalidArgumentError(format!( + "Band {band_idx} dimension {spatial_dim:?} has size {actual}, \ + expected {expected} from top-level spatial_shape" + ))); + } + } + } + let next_offset = self.band_offsets.last().unwrap() + self.current_band_count; self.band_offsets.push(next_offset); - self.raster_validity.append_value(true); - + self.current_raster_bands.clear(); + self.current_spatial_dims.clear(); + self.current_spatial_shape.clear(); Ok(()) } - /// Append raster metadata from a MetadataRef trait object - fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - self.width.append_value(metadata.width()); - self.height.append_value(metadata.height()); - self.upper_left_x.append_value(metadata.upper_left_x()); - self.upper_left_y.append_value(metadata.upper_left_y()); - self.scale_x.append_value(metadata.scale_x()); - self.scale_y.append_value(metadata.scale_y()); - self.skew_x.append_value(metadata.skew_x()); - self.skew_y.append_value(metadata.skew_y()); - - Ok(()) - } - - /// Set the CRS for the current raster - pub fn append_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - match crs { - Some(crs_data) => self.crs.append_value(crs_data), - None => self.crs.append_null(), + /// Append a null raster. + pub fn append_null(&mut self) -> Result<(), ArrowError> { + // Transform: append 6 zeros + for _ in 0..6 { + self.transform_values.append_value(0.0); } - Ok(()) - } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); - /// Append a null raster - pub fn append_null(&mut self) -> Result<(), ArrowError> { - // Since metadata fields are non-nullable, provide default values - self.width.append_value(0u64); - self.height.append_value(0u64); - self.upper_left_x.append_value(0.0f64); - self.upper_left_y.append_value(0.0f64); - self.scale_x.append_value(0.0f64); - self.scale_y.append_value(0.0f64); - self.skew_x.append_value(0.0f64); - self.skew_y.append_value(0.0f64); - - // Append null CRS + // Spatial dims + shape: empty list for null rasters. + let next = *self.spatial_dims_offsets.last().unwrap(); + self.spatial_dims_offsets.push(next); + let next = *self.spatial_shape_offsets.last().unwrap(); + self.spatial_shape_offsets.push(next); + + // CRS: null self.crs.append_null(); - // No bands for null raster + // No bands let current_offset = *self.band_offsets.last().unwrap(); self.band_offsets.push(current_offset); - // Mark raster as null + // Mark null self.raster_validity.append_null(); Ok(()) } - /// Finish building and return the constructed StructArray + /// Finish building and return the constructed StructArray. pub fn finish(mut self) -> Result { - // Build the metadata struct using the schema - let metadata_fields = if let DataType::Struct(fields) = RasterSchema::metadata_type() { - fields - } else { + // Build transform list + let transform_values = self.transform_values.finish(); + let transform_offsets = OffsetBuffer::new(ScalarBuffer::from(self.transform_offsets)); + let DataType::List(transform_field) = RasterSchema::transform_type() else { return Err(ArrowError::SchemaError( - "Expected struct type for metadata".to_string(), + "Expected list type for transform".to_string(), )); }; + let transform_list = ListArray::new( + transform_field, + transform_offsets, + Arc::new(transform_values), + None, + ); - let metadata_arrays: Vec = vec![ - Arc::new(self.width.finish()), - Arc::new(self.height.finish()), - Arc::new(self.upper_left_x.finish()), - Arc::new(self.upper_left_y.finish()), - Arc::new(self.scale_x.finish()), - Arc::new(self.scale_y.finish()), - Arc::new(self.skew_x.finish()), - Arc::new(self.skew_y.finish()), - ]; - let metadata_array = StructArray::new(metadata_fields, metadata_arrays, None); - - // Build the band metadata struct using the schema - let band_metadata_fields = - if let DataType::Struct(fields) = RasterSchema::band_metadata_type() { - fields - } else { - return Err(ArrowError::SchemaError( - "Expected struct type for band metadata".to_string(), - )); - }; - - let band_metadata_arrays: Vec = vec![ - Arc::new(self.band_nodata.finish()), - Arc::new(self.band_storage_type.finish()), - Arc::new(self.band_datatype.finish()), - Arc::new(self.band_outdb_url.finish()), - Arc::new(self.band_outdb_band_id.finish()), - ]; - let band_metadata_array = - StructArray::new(band_metadata_fields, band_metadata_arrays, None); + // Build spatial_dims list + let spatial_dims_values = self.spatial_dims_values.finish(); + let spatial_dims_offsets = OffsetBuffer::new(ScalarBuffer::from(self.spatial_dims_offsets)); + let DataType::List(spatial_dims_field) = RasterSchema::spatial_dims_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for spatial_dims".to_string(), + )); + }; + let spatial_dims_list = ListArray::new( + spatial_dims_field, + spatial_dims_offsets, + Arc::new(spatial_dims_values), + None, + ); + + // Build spatial_shape list + let spatial_shape_values = self.spatial_shape_values.finish(); + let spatial_shape_offsets = + OffsetBuffer::new(ScalarBuffer::from(self.spatial_shape_offsets)); + let DataType::List(spatial_shape_field) = RasterSchema::spatial_shape_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for spatial_shape".to_string(), + )); + }; + let spatial_shape_list = ListArray::new( + spatial_shape_field, + spatial_shape_offsets, + Arc::new(spatial_shape_values), + None, + ); + + // Build band dim_names nested list + let dim_names_values = self.band_dim_names_values.finish(); + let dim_names_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_dim_names_offsets)); + let DataType::List(dim_names_field) = RasterSchema::dim_names_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for dim_names".to_string(), + )); + }; + let dim_names_list = ListArray::new( + dim_names_field, + dim_names_offsets, + Arc::new(dim_names_values), + None, + ); + + // Build band source_shape nested list + let source_shape_values = self.band_shape_values.finish(); + let source_shape_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_shape_offsets)); + let DataType::List(source_shape_field) = RasterSchema::source_shape_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for source_shape".to_string(), + )); + }; + let source_shape_list = ListArray::new( + source_shape_field, + source_shape_offsets, + Arc::new(source_shape_values), + None, + ); - // Build the band struct using the schema - let band_fields = if let DataType::Struct(fields) = RasterSchema::band_type() { - fields + // Build band view nested list (List>). + let view_source_axis = self.band_view_source_axis_values.finish(); + let view_start = self.band_view_start_values.finish(); + let view_step = self.band_view_step_values.finish(); + let view_steps = self.band_view_steps_values.finish(); + let view_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_view_offsets)); + let DataType::List(view_list_field) = RasterSchema::view_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for view".to_string(), + )); + }; + let DataType::Struct(view_struct_fields) = view_list_field.data_type().clone() else { + return Err(ArrowError::SchemaError( + "Expected struct type inside view list".to_string(), + )); + }; + let view_struct = StructArray::new( + view_struct_fields, + vec![ + Arc::new(view_source_axis) as ArrayRef, + Arc::new(view_start) as ArrayRef, + Arc::new(view_step) as ArrayRef, + Arc::new(view_steps) as ArrayRef, + ], + None, + ); + let view_nulls = if self.band_view_validity.iter().all(|&b| b) { + None } else { + Some(NullBuffer::from_iter( + self.band_view_validity.iter().copied(), + )) + }; + let view_list = ListArray::new( + view_list_field, + view_offsets, + Arc::new(view_struct), + view_nulls, + ); + + // Build band struct + let DataType::Struct(band_fields) = RasterSchema::band_type() else { return Err(ArrowError::SchemaError( "Expected struct type for band".to_string(), )); }; let band_arrays: Vec = vec![ - Arc::new(band_metadata_array), + Arc::new(self.band_name.finish()), + Arc::new(dim_names_list), + Arc::new(source_shape_list), + Arc::new(self.band_datatype.finish()), + Arc::new(self.band_nodata.finish()), + Arc::new(view_list), + Arc::new(self.band_outdb_uri.finish()), + Arc::new(self.band_outdb_format.finish()), Arc::new(self.band_data.finish()), ]; - let band_struct_array = StructArray::new(band_fields, band_arrays, None); + let band_struct = StructArray::new(band_fields, band_arrays, None); - // Build the bands list array using the schema - let band_field = if let DataType::List(field) = RasterSchema::bands_type() { - field - } else { + // Build bands list + let DataType::List(bands_field) = RasterSchema::bands_type() else { return Err(ArrowError::SchemaError( "Expected list type for bands".to_string(), )); }; + let band_list_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); + let bands_list = + ListArray::new(bands_field, band_list_offsets, Arc::new(band_struct), None); - let offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); - let bands_list = ListArray::new(band_field, offsets, Arc::new(band_struct_array), None); - - // Build the final raster struct using the schema + // Build top-level raster struct let raster_fields = RasterSchema::fields(); let raster_arrays: Vec = vec![ - Arc::new(metadata_array), Arc::new(self.crs.finish()), + Arc::new(transform_list), + Arc::new(spatial_dims_list), + Arc::new(spatial_shape_list), Arc::new(bands_list), ]; @@ -350,500 +605,765 @@ impl RasterBuilder { mod tests { use super::*; use crate::array::RasterStructArray; - use crate::traits::{RasterMetadata, RasterRef}; - use sedona_schema::raster::{BandDataType, StorageType}; + use crate::traits::RasterRef; #[test] - fn test_iterator_basic_functionality() { - // Create a simple raster for testing using the correct API - let mut builder = RasterBuilder::new(10); // capacity - - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + fn test_roundtrip_2d_raster() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + 10, + 20, + 100.0, + 200.0, + 1.0, + -2.0, + 0.25, + 0.5, + Some("EPSG:4326"), + ) + .unwrap(); + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value(vec![1u8; 200]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let epsg4326 = "EPSG:4326"; - builder.start_raster(&metadata, Some(epsg4326)).unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + assert_eq!(rasters.len(), 1); - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + let r = rasters.get(0).unwrap(); + assert_eq!(r.width(), Some(10)); + assert_eq!(r.height(), Some(20)); + assert_eq!(r.transform(), &[100.0, 1.0, 0.25, 200.0, 0.5, -2.0]); + assert_eq!(r.x_dim(), "x"); + assert_eq!(r.y_dim(), "y"); + assert_eq!(r.crs(), Some("EPSG:4326")); + assert_eq!(r.num_bands(), 1); + + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[20, 10]); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata(), Some(&[255u8][..])); + assert_eq!(band.contiguous_data().unwrap().len(), 200); + } - // Add a single band with some test data using the correct API - builder.start_band(band_metadata.clone()).unwrap(); - let test_data = vec![1u8; 100]; // 10x10 raster with value 1 - builder.band_data_writer().append_value(&test_data); + #[test] + fn test_roundtrip_multi_band() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(2, 2, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + + // Band 0: UInt8 + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value([1u8, 2, 3, 4]); + builder.finish_band().unwrap(); + + // Band 1: Float32 + builder.start_band_2d(BandDataType::Float32, None).unwrap(); + let f32_data: Vec = [1.5f32, 2.5, 3.5, 4.5] + .iter() + .flat_map(|v| v.to_le_bytes()) + .collect(); + builder.band_data_writer().append_value(&f32_data); builder.finish_band().unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - let raster_array = builder.finish().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - // Test the iterator - let rasters = RasterStructArray::new(&raster_array); + assert_eq!(r.num_bands(), 2); - assert_eq!(rasters.len(), 1); - assert!(!rasters.is_empty()); + let b0 = r.band(0).unwrap(); + assert_eq!(b0.data_type(), BandDataType::UInt8); + assert_eq!(b0.nodata(), Some(&[255u8][..])); - let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); + let b1 = r.band(1).unwrap(); + assert_eq!(b1.data_type(), BandDataType::Float32); + assert_eq!(b1.nodata(), None); + } - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 10); - assert_eq!(metadata.scale_x(), 1.0); - assert_eq!(metadata.scale_y(), -1.0); + #[test] + fn test_null_raster() { + let mut builder = RasterBuilder::new(2); + builder + .start_raster_2d(1, 1, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([0u8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let bands = raster.bands(); - assert_eq!(bands.len(), 1); - assert!(!bands.is_empty()); + builder.append_null().unwrap(); - // Access band with 1-based band_number - let band = bands.band(1).unwrap(); - assert_eq!(band.data().len(), 100); - assert_eq!(band.data()[0], 1u8); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + assert_eq!(rasters.len(), 2); + assert!(!rasters.is_null(0)); + assert!(rasters.is_null(1)); + } - let band_meta = band.metadata(); - assert_eq!(band_meta.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_meta.data_type().unwrap(), BandDataType::UInt8); + #[test] + fn test_nd_band() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); - let crs = raster.crs().unwrap(); - assert_eq!(crs, epsg4326); + // 3D band: [time=3, y=4, x=5] + builder + .start_band( + Some("temperature"), + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 3 * 4 * 5 * 4]; // 3*4*5 Float32 elements + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Test iterator over bands - let band_iter: Vec<_> = bands.iter().collect(); - assert_eq!(band_iter.len(), 1); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.band_name(0), Some("temperature")); + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band.shape(), &[3, 4, 5]); + assert_eq!(band.dim_size("time"), Some(3)); + assert_eq!(band.dim_size("y"), Some(4)); + assert_eq!(band.dim_size("x"), Some(5)); + assert_eq!(band.dim_size("z"), None); + + // Verify strides are standard C-order: [4*5*4, 5*4, 4] = [80, 20, 4] + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[80, 20, 4]); + assert_eq!(buf.offset, 0); } #[test] - fn test_multi_band_iterator() { - let mut builder = RasterBuilder::new(3); + fn test_nonstandard_spatial_dim_names() { + // Zarr-style dataset with lat/lon instead of y/x + let mut builder = RasterBuilder::new(1); + let transform = [10.0, 0.01, 0.0, 50.0, 0.0, -0.01]; + builder + .start_raster( + &transform, + &["longitude", "latitude"], + &[360, 180], + Some("EPSG:4326"), + ) + .unwrap(); + builder + .start_band( + Some("sst"), + &["latitude", "longitude"], + &[180, 360], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 180 * 360 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let metadata = RasterMetadata { - width: 5, - height: 5, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - builder.start_raster(&metadata, None).unwrap(); - - // Add three bands using the correct API - for band_idx in 0..3 { - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - let test_data = vec![band_idx as u8; 25]; // 5x5 raster - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } + assert_eq!(r.x_dim(), "longitude"); + assert_eq!(r.y_dim(), "latitude"); + // width = size of "longitude" dim, height = size of "latitude" dim + assert_eq!(r.width(), Some(360)); + assert_eq!(r.height(), Some(180)); + } + + #[test] + fn test_mixed_dimensionality_bands() { + // One 3D band and one 2D band in the same raster + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[64, 64], None) + .unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); + // Band 0: 3D [time=12, y=64, x=64] + builder + .start_band( + Some("temperature"), + &["time", "y", "x"], + &[12, 64, 64], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data_3d = vec![0u8; 12 * 64 * 64 * 4]; + builder.band_data_writer().append_value(&data_3d); + builder.finish_band().unwrap(); - let raster_array = builder.finish().unwrap(); + // Band 1: 2D [y=64, x=64] + builder + .start_band( + Some("elevation"), + &["y", "x"], + &[64, 64], + BandDataType::Float64, + None, + None, + None, + ) + .unwrap(); + let data_2d = vec![0u8; 64 * 64 * 8]; + builder.band_data_writer().append_value(&data_2d); + builder.finish_band().unwrap(); - let rasters = RasterStructArray::new(&raster_array); - let raster = rasters.get(0).unwrap(); - let bands = raster.bands(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 2); + // width/height derived from band(0) which is 3D + assert_eq!(r.width(), Some(64)); + assert_eq!(r.height(), Some(64)); + + let b0 = r.band(0).unwrap(); + assert_eq!(b0.ndim(), 3); + assert_eq!(b0.dim_names(), vec!["time", "y", "x"]); + assert_eq!(b0.shape(), &[12, 64, 64]); + assert_eq!(b0.dim_size("time"), Some(12)); + + let b1 = r.band(1).unwrap(); + assert_eq!(b1.ndim(), 2); + assert_eq!(b1.dim_names(), vec!["y", "x"]); + assert_eq!(b1.shape(), &[64, 64]); + assert_eq!(b1.dim_size("time"), None); + } - assert_eq!(bands.len(), 3); + #[test] + fn test_dim_index_lookup() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[32, 32], None) + .unwrap(); + builder + .start_band( + None, + &["time", "pressure", "y", "x"], + &[6, 10, 32, 32], + BandDataType::Float32, + None, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 6 * 10 * 32 * 32 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Test each band has different data - // Use 1-based band numbers - for i in 0..3 { - // Access band with 1-based band_number - let band = bands.band(i + 1).unwrap(); - let expected_value = i as u8; - assert!(band.data().iter().all(|&x| x == expected_value)); - } + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); - // Test iterator - let band_values: Vec = bands - .iter() - .enumerate() - .map(|(i, band)| { - assert_eq!(band.data()[0], i as u8); - band.data()[0] - }) - .collect(); + assert_eq!(band.dim_index("time"), Some(0)); + assert_eq!(band.dim_index("pressure"), Some(1)); + assert_eq!(band.dim_index("y"), Some(2)); + assert_eq!(band.dim_index("x"), Some(3)); + assert_eq!(band.dim_index("wavelength"), None); - assert_eq!(band_values, vec![0, 1, 2]); + assert_eq!(band.dim_size("time"), Some(6)); + assert_eq!(band.dim_size("pressure"), Some(10)); + assert_eq!(band.dim_size("wavelength"), None); } #[test] - fn test_copy_metadata_from_iterator() { - // Create an original raster - let mut source_builder = RasterBuilder::new(10); - - let original_metadata = RasterMetadata { - width: 42, - height: 24, - upperleft_x: -122.0, - upperleft_y: 37.8, - scale_x: 0.1, - scale_y: -0.1, - skew_x: 0.0, - skew_y: 0.0, - }; + fn test_contiguous_data_is_borrowed() { + use std::borrow::Cow; - source_builder - .start_raster(&original_metadata, None) + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([1u8; 16]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let data = band.contiguous_data().unwrap(); + // Identity-view bands are always contiguous, so should be Cow::Borrowed + assert!(matches!(data, Cow::Borrowed(_))); + assert_eq!(data.len(), 16); + } - source_builder.start_band(band_metadata).unwrap(); - let test_data = vec![42u8; 1008]; // 42x24 raster - source_builder.band_data_writer().append_value(&test_data); - source_builder.finish_band().unwrap(); - source_builder.finish_raster().unwrap(); + #[test] + fn test_nd_buffer_strides_various_types() { + // Each raster exercises a different shape; strict spatial-grid + // validation forbids mixing bands of disagreeing spatial sizes within + // one raster. + let mut builder = RasterBuilder::new(3); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; - let source_array = source_builder.finish().unwrap(); + // Raster 0 — UInt8: element size = 1, shape [3, 4] → strides [4, 1] + builder + .start_raster(&transform, &["x", "y"], &[4, 3], None) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[3, 4], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 12]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Create a new raster using metadata from the iterator - let mut target_builder = RasterBuilder::new(10); - let iterator = RasterStructArray::new(&source_array); - let source_raster = iterator.get(0).unwrap(); + // Raster 1 — Float64: element size = 8, shape [2, 3, 5] → strides [120, 40, 8] + builder + .start_raster(&transform, &["x", "y"], &[5, 3], None) + .unwrap(); + builder + .start_band( + None, + &["z", "y", "x"], + &[2, 3, 5], + BandDataType::Float64, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 2 * 3 * 5 * 8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - target_builder - .start_raster(source_raster.metadata(), source_raster.crs()) + // Raster 2 — UInt16: element size = 2, shape [10] → strides [2]. + // Only has an "x" dim, so declare spatial_dims=["x"]. + builder + .start_raster(&transform, &["x"], &[10], None) .unwrap(); + builder + .start_band(None, &["x"], &[10], BandDataType::UInt16, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 20]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Add new band data while preserving original metadata - let new_band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt16, - outdb_url: None, - outdb_band_id: None, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); - target_builder.start_band(new_band_metadata).unwrap(); - let new_data = vec![100u16; 1008]; // Different data, same dimensions - let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); + let r0 = rasters.get(0).unwrap(); + let b0 = r0.band(0).unwrap(); + assert_eq!(b0.nd_buffer().unwrap().strides, &[4, 1]); // UInt8 [3, 4] - target_builder - .band_data_writer() - .append_value(&new_data_bytes); - target_builder.finish_band().unwrap(); - target_builder.finish_raster().unwrap(); - - let target_array = target_builder.finish().unwrap(); - - // Verify the metadata was copied correctly - let target_iterator = RasterStructArray::new(&target_array); - let target_raster = target_iterator.get(0).unwrap(); - let target_metadata = target_raster.metadata(); - - // All metadata should match the original - assert_eq!(target_metadata.width(), 42); - assert_eq!(target_metadata.height(), 24); - assert_eq!(target_metadata.upper_left_x(), -122.0); - assert_eq!(target_metadata.upper_left_y(), 37.8); - assert_eq!(target_metadata.scale_x(), 0.1); - assert_eq!(target_metadata.scale_y(), -0.1); - - // But band data and metadata should be different - let target_band = target_raster.bands().band(1).unwrap(); - let target_band_meta = target_band.metadata(); - assert_eq!(target_band_meta.data_type().unwrap(), BandDataType::UInt16); - assert!(target_band_meta.nodata_value().is_none()); - assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 - - let result = target_raster.bands().band(0); - assert!(result.is_err(), "Band number 0 should be invalid"); - - let result = target_raster.bands().band(2); - assert!(result.is_err(), "Band number 2 should be out of range"); + let r1 = rasters.get(1).unwrap(); + let b1 = r1.band(0).unwrap(); + assert_eq!(b1.nd_buffer().unwrap().strides, &[120, 40, 8]); // Float64 [2, 3, 5] + + let r2 = rasters.get(2).unwrap(); + let b2 = r2.band(0).unwrap(); + assert_eq!(b2.nd_buffer().unwrap().strides, &[2]); // UInt16 [10] } #[test] - fn test_band_data_types() { - // Create a test raster with bands of different data types + fn test_width_height_no_bands() { + // Zero-band raster — used as a "target grid" specification (GDAL warp + // pattern). Width/height come from the top-level spatial_shape, not + // band(0). let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[64, 32], None) + .unwrap(); + builder.finish_raster().unwrap(); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 0); + assert_eq!(r.width(), Some(64)); + assert_eq!(r.height(), Some(32)); + } + + #[test] + fn test_band_name_nullable() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); - builder.start_raster(&metadata, None).unwrap(); - - // Test all BandDataType variants - let test_cases = vec![ - (BandDataType::UInt8, vec![1u8, 2u8, 3u8, 4u8]), - (BandDataType::Int8, vec![255u8, 254u8, 253u8, 252u8]), // -1, -2, -3, -4 as i8 - ( - BandDataType::UInt16, - vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8], - ), // little-endian u16 - ( - BandDataType::Int16, - vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8], - ), // little-endian i16 - ( - BandDataType::UInt32, - vec![ - 1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, - ], - ), // little-endian u32 - ( - BandDataType::Int32, - vec![ - 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, - 255u8, 252u8, 255u8, 255u8, 255u8, - ], - ), // little-endian i32 - ( - BandDataType::UInt64, - vec![ - 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - ], - ), // little-endian u64 - ( - BandDataType::Int64, - vec![ - 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, - 255u8, 255u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, 255u8, 255u8, 255u8, - 255u8, 255u8, 252u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, - ], - ), // little-endian i64: -1, -2, -3, -4 - ( + // Named band + builder + .start_band( + Some("temperature"), + &["y", "x"], + &[4, 4], BandDataType::Float32, - vec![ - 0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, - 128u8, 64u8, - ], - ), // little-endian f32: 1.0, 2.0, 3.0, 4.0 - ( - BandDataType::Float64, - vec![ - 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 16u8, 64u8, - ], - ), // little-endian f64: 1.0, 2.0, 3.0, 4.0 - ]; + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 64]); + builder.finish_band().unwrap(); - for (expected_data_type, test_data) in test_cases { - let band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: expected_data_type, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } + // Unnamed band (via start_band_2d which passes None for name) + builder.current_width = 4; + builder.current_height = 4; + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value(vec![0u8; 16]); + builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let raster_array = builder.finish().unwrap(); - - // Test the data type conversion for each band - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 10, "Expected 10 bands for all data types"); - - // Verify each band returns the correct data type - let expected_types = [ - BandDataType::UInt8, - BandDataType::Int8, - BandDataType::UInt16, - BandDataType::Int16, - BandDataType::UInt32, - BandDataType::Int32, - BandDataType::UInt64, - BandDataType::Int64, - BandDataType::Float32, - BandDataType::Float64, - ]; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - // i is zero-based index - for (i, expected_type) in expected_types.iter().enumerate() { - // Bands are 1-based band_number - let band = bands.band(i + 1).unwrap(); - let band_metadata = band.metadata(); - let actual_type = band_metadata.data_type().unwrap(); - - assert_eq!( - actual_type, *expected_type, - "Band {i} expected data type {expected_type:?}, got {actual_type:?}" - ); - } + assert_eq!(r.band_name(0), Some("temperature")); + assert_eq!(r.band_name(1), None); // unnamed + assert_eq!(r.band_name(99), None); // out of range } #[test] - fn test_outdb_metadata_fields() { - // Test creating raster with OutDb reference metadata - let mut builder = RasterBuilder::new(10); - - let metadata = RasterMetadata { - width: 1024, - height: 1024, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + fn test_spatial_dims_shape_roundtrip() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["longitude", "latitude"], &[360, 180], None) + .unwrap(); + builder + .start_band( + None, + &["latitude", "longitude"], + &[180, 360], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 360 * 180]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - builder.start_raster(&metadata, None).unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - // Test InDb band (should have null OutDb fields) - let indb_band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + assert_eq!(r.spatial_dims(), vec!["longitude", "latitude"]); + assert_eq!(r.spatial_shape(), &[360, 180]); + assert_eq!(r.x_dim(), "longitude"); + assert_eq!(r.y_dim(), "latitude"); + assert_eq!(r.width(), Some(360)); + assert_eq!(r.height(), Some(180)); + } - builder.start_band(indb_band_metadata).unwrap(); - let test_data = vec![1u8; 100]; - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); + #[test] + fn test_zero_band_raster_roundtrip() { + // Zero-band rasters double as "target grid" specifications. They must + // round-trip through the builder cleanly. + let mut builder = RasterBuilder::new(1); + let transform = [10.0, 1.0, 0.0, 20.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[128, 64], Some("EPSG:3857")) + .unwrap(); + builder.finish_raster().unwrap(); - // Test OutDbRef band (should have OutDb fields populated) - let outdb_band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://mybucket/satellite_image.tif".to_string()), - outdb_band_id: Some(2), - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - builder.start_band(outdb_band_metadata).unwrap(); - // For OutDbRef, data field could be empty or contain metadata/thumbnail - builder.band_data_writer().append_value([]); + assert_eq!(r.num_bands(), 0); + assert_eq!(r.spatial_dims(), vec!["x", "y"]); + assert_eq!(r.spatial_shape(), &[128, 64]); + assert_eq!(r.width(), Some(128)); + assert_eq!(r.height(), Some(64)); + assert_eq!(r.crs(), Some("EPSG:3857")); + } + + #[test] + fn test_band_missing_spatial_dim_errors() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); + // Band is missing "y" entirely. + builder + .start_band(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 4]); builder.finish_band().unwrap(); - builder.finish_raster().unwrap(); - let raster_array = builder.finish().unwrap(); - - // Verify the band metadata - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 2); - - // Test InDb band - let indb_band = bands.band(1).unwrap(); - let indb_metadata = indb_band.metadata(); - assert_eq!(indb_metadata.storage_type().unwrap(), StorageType::InDb); - assert_eq!(indb_metadata.data_type().unwrap(), BandDataType::UInt8); - assert!(indb_metadata.outdb_url().is_none()); - assert!(indb_metadata.outdb_band_id().is_none()); - assert_eq!(indb_band.data().len(), 100); - - // Test OutDbRef band - let outdb_band = bands.band(2).unwrap(); - let outdb_metadata = outdb_band.metadata(); - assert_eq!( - outdb_metadata.storage_type().unwrap(), - StorageType::OutDbRef + let err = builder.finish_raster().unwrap_err(); + assert!( + err.to_string().contains("missing spatial dimension"), + "unexpected error: {err}" ); - assert_eq!(outdb_metadata.data_type().unwrap(), BandDataType::Float32); - assert_eq!( - outdb_metadata.outdb_url().unwrap(), - "s3://mybucket/satellite_image.tif" + } + + #[test] + fn test_start_band_rejects_zero_dim() { + // 0-D bands carry no spatial extent and no caller has a use for + // them. start_band must reject an empty dim_names slice eagerly so + // the malformed band never reaches the buffer layer. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, &[], &[], None).unwrap(); + let err = builder + .start_band(None, &[], &[], BandDataType::UInt8, None, None, None) + .unwrap_err(); + assert!( + err.to_string().contains("0-dimensional"), + "unexpected error: {err}" ); - assert_eq!(outdb_metadata.outdb_band_id().unwrap(), 2); - assert_eq!(outdb_band.data().len(), 0); // Empty data for OutDbRef } #[test] - fn test_band_access_errors() { - // Create a simple raster with one band + fn test_contiguous_data_identity_via_start_band_is_borrowed() { + // Canonical identity: the row's view list is null, and the read path + // synthesises the identity view. Should still hand the underlying + // bytes back without copying. + use std::borrow::Cow; + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + let pixels: Vec = (0..6).collect(); + builder.band_data_writer().append_value(pixels.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); - builder.start_raster(&metadata, None).unwrap(); + // Visible shape comes from the synthesised identity view. + assert_eq!(band.shape(), &[2, 3]); + assert_eq!(band.raw_source_shape(), &[2, 3]); - let band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[3, 1]); + assert_eq!(buf.offset, 0); + + let bytes = band.contiguous_data().unwrap(); + assert!(matches!(bytes, Cow::Borrowed(_))); + assert_eq!(&*bytes, pixels.as_slice()); + } + + #[test] + fn test_view_field_is_null_for_identity_band() { + // Schema invariant: identity views are stored as null list rows so + // the canonical "no slice" case costs no Arrow space. Confirm by + // poking the raw column. + use arrow_array::Array; - builder.start_band(band_metadata).unwrap(); - builder.band_data_writer().append_value([1u8; 100]); + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[2, 2], None) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 4]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let raster_array = builder.finish().unwrap(); - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - // Test invalid band number (0-based) - let result = bands.band(0); - assert!(result.is_err()); - let err = result.err().unwrap().to_string(); - assert!(err.contains("band numbers must be 1-based")); - - // Test out of range band number - let result = bands.band(2); - assert!(result.is_err()); - let err = result.err().unwrap().to_string(); - assert!(err.contains("is out of range")); - - // Test valid band number should still work - let result = bands.band(1); - assert!(result.is_ok()); - let band = result.unwrap(); - assert_eq!(band.data().len(), 100); + let array = builder.finish().unwrap(); + let bands_list = array + .column(sedona_schema::raster::raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_struct = bands_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let view_list = bands_struct + .column(sedona_schema::raster::band_indices::VIEW) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(view_list.len(), 1); + assert!( + view_list.is_null(0), + "identity-view band should serialise as a null view row" + ); + } + + #[test] + fn test_band_spatial_dim_size_mismatch_errors() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[4, 4], None) + .unwrap(); + // Band has "x" and "y" but x-size disagrees with top-level shape. + builder + .start_band( + None, + &["y", "x"], + &[4, 8], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 32]); + builder.finish_band().unwrap(); + + let err = builder.finish_raster().unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("has size 8") && msg.contains("expected 4"), + "unexpected error: {msg}" + ); + } + + #[test] + fn test_view_null_round_trips_through_arrow_ipc() { + // Schema invariant: a band built via start_band serialises with a + // null view row, and the null must survive an Arrow IPC round-trip. + // If a future change accidentally writes a non-null empty list + // instead, downstream readers (DuckDB, PyArrow, sedona-py) will + // disagree about whether the view is identity. + use arrow_array::RecordBatch; + use arrow_ipc::reader::StreamReader; + use arrow_ipc::writer::StreamWriter; + use arrow_schema::Schema; + use std::io::Cursor; + + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[2, 3], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 6]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let schema = Arc::new(Schema::new(vec![Arc::new(arrow_schema::Field::new( + "raster", + array.data_type().clone(), + true, + )) as arrow_schema::FieldRef])); + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array.clone())]).unwrap(); + + let mut buf: Vec = Vec::new(); + { + let mut writer = StreamWriter::try_new(&mut buf, schema.as_ref()).unwrap(); + writer.write(&batch).unwrap(); + writer.finish().unwrap(); + } + + let cursor = Cursor::new(buf); + let reader = StreamReader::try_new(cursor, None).unwrap(); + let batches: Vec<_> = reader.collect::, _>>().unwrap(); + assert_eq!(batches.len(), 1); + let restored_struct = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + let bands_list = restored_struct + .column(sedona_schema::raster::raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_struct = bands_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let view_list = bands_struct + .column(sedona_schema::raster::band_indices::VIEW) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(view_list.len(), 1); + assert!( + view_list.is_null(0), + "identity-view band must remain a null view row after IPC round-trip" + ); + + let rasters = RasterStructArray::new(restored_struct); + let r0 = rasters.get(0).unwrap(); + assert_eq!(r0.band(0).unwrap().shape(), &[2, 3]); } } diff --git a/rust/sedona-raster/src/display.rs b/rust/sedona-raster/src/display.rs index 400658a0a..0e2f79062 100644 --- a/rust/sedona-raster/src/display.rs +++ b/rust/sedona-raster/src/display.rs @@ -19,7 +19,6 @@ use std::fmt; use crate::affine_transformation::to_world_coordinate; use crate::traits::RasterRef; -use sedona_schema::raster::StorageType; /// Wrapper for formatting a raster reference as a human-readable string. /// @@ -57,12 +56,10 @@ pub struct RasterDisplay<'a>(pub &'a dyn RasterRef); impl fmt::Display for RasterDisplay<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let raster = self.0; - let metadata = raster.metadata(); - let bands = raster.bands(); - let width = metadata.width(); - let height = metadata.height(); - let nbands = bands.len(); + let width = raster.width().unwrap_or(0); + let height = raster.height().unwrap_or(0); + let nbands = raster.num_bands(); // Compute axis-aligned bounding box from 4 corners in world coordinates. // This handles both skewed and non-skewed rasters correctly. @@ -78,13 +75,12 @@ impl fmt::Display for RasterDisplay<'_> { let ymin = uly.min(ury).min(lry).min(lly); let ymax = uly.max(ury).max(lry).max(lly); - let skew_x = metadata.skew_x(); - let skew_y = metadata.skew_y(); + let t = raster.transform(); + let skew_x = t[2]; + let skew_y = t[4]; let has_skew = skew_x != 0.0 || skew_y != 0.0; - let has_outdb = bands - .iter() - .any(|band| matches!(band.metadata().storage_type(), Ok(StorageType::OutDbRef))); + let has_outdb = (0..nbands).any(|i| raster.band(i).is_some_and(|b| !b.is_indb())); // Write: [WxH/nbands] @ [xmin ymin xmax ymax] write!( @@ -92,7 +88,7 @@ impl fmt::Display for RasterDisplay<'_> { "[{width}x{height}/{nbands}] @ [{xmin} {ymin} {xmax} {ymax}]" )?; - // Conditionally append skew info when the raster is rotated/skewed + // Conditionally append skew info if has_skew { write!(f, " skew=({skew_x}, {skew_y})")?; } @@ -150,7 +146,6 @@ mod tests { #[test] fn display_write_to_fmt_write() { - // Verify RasterDisplay works with any fmt::Write target (e.g., String) let rasters = generate_test_rasters(1, None).unwrap(); let raster_array = RasterStructArray::new(&rasters); let raster = raster_array.get(0).unwrap(); diff --git a/rust/sedona-testing/src/benchmark_util.rs b/rust/sedona-testing/src/benchmark_util.rs index 56db9dd5b..c23646d15 100644 --- a/rust/sedona-testing/src/benchmark_util.rs +++ b/rust/sedona-testing/src/benchmark_util.rs @@ -970,8 +970,7 @@ mod test { let rasters = RasterStructArray::new(raster_array); assert_eq!(rasters.len(), ROWS_PER_BATCH); let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 5); + assert_eq!(raster.width(), Some(10)); + assert_eq!(raster.height(), Some(5)); } } diff --git a/rust/sedona-testing/src/rasters.rs b/rust/sedona-testing/src/rasters.rs index a29b72f9e..98b8a579b 100644 --- a/rust/sedona-testing/src/rasters.rs +++ b/rust/sedona-testing/src/rasters.rs @@ -19,16 +19,9 @@ use datafusion_common::Result; use fastrand::Rng; use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::{BandMetadata, RasterMetadata, RasterRef}; +use sedona_raster::traits::RasterRef; use sedona_schema::crs::lnglat; -use sedona_schema::raster::{BandDataType, StorageType}; - -/// Describes a single in-db band used by test raster builders. -pub struct InDbTestBand { - pub datatype: BandDataType, - pub nodata_value: Option>, - pub data: Vec, -} +use sedona_schema::raster::BandDataType; /// Generate a StructArray of rasters with sequentially increasing dimensions and pixel values /// These tiny rasters are to provide fast, easy and predictable test data for unit tests. @@ -46,27 +39,23 @@ pub fn generate_test_rasters( continue; } - let raster_metadata = RasterMetadata { - width: i as u64 + 1, - height: i as u64 + 2, - upperleft_x: i as f64 + 1.0, - upperleft_y: i as f64 + 2.0, - scale_x: i.max(1) as f64 * 0.1, - scale_y: i.max(1) as f64 * -0.2, - skew_x: i as f64 * 0.03, - skew_y: i as f64 * 0.04, - }; - builder.start_raster(&raster_metadata, Some(&crs))?; - builder.start_band(BandMetadata { - datatype: BandDataType::UInt16, - nodata_value: Some(vec![0u8; 2]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - })?; + let width = i as u64 + 1; + let height = i as u64 + 2; + builder.start_raster_2d( + width, + height, + i as f64 + 1.0, // origin_x + i as f64 + 2.0, // origin_y + i.max(1) as f64 * 0.1, // scale_x + i.max(1) as f64 * -0.2, // scale_y + i as f64 * 0.03, // skew_x + i as f64 * 0.04, // skew_y + Some(&crs), + )?; + builder.start_band_2d(BandDataType::UInt16, Some(&[0u8, 0u8]))?; let pixel_count = (i + 1) * (i + 2); // width * height - let mut band_data = Vec::with_capacity(pixel_count * 2); // 2 bytes per u16 + let mut band_data = Vec::with_capacity(pixel_count * 2); for pixel_value in 0..pixel_count as u16 { band_data.extend_from_slice(&pixel_value.to_le_bytes()); } @@ -105,37 +94,26 @@ pub fn generate_tiled_rasters( let origin_x = (tile_x * tile_width) as f64; let origin_y = (tile_y * tile_height) as f64; - let raster_metadata = RasterMetadata { - width: tile_width as u64, - height: tile_height as u64, - upperleft_x: origin_x, - upperleft_y: origin_y, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - raster_builder.start_raster(&raster_metadata, Some(&crs))?; + raster_builder.start_raster_2d( + tile_width as u64, + tile_height as u64, + origin_x, + origin_y, + 1.0, + 1.0, + 0.0, + 0.0, + Some(&crs), + )?; for _ in 0..band_count { // Set a nodata value appropriate for the data type let nodata_value = get_nodata_value_for_type(&data_type); - let nodata_value_bytes = nodata_value.clone(); - let band_metadata = BandMetadata { - nodata_value, - storage_type: StorageType::InDb, - datatype: data_type, - outdb_url: None, - outdb_band_id: None, - }; - - raster_builder.start_band(band_metadata)?; + raster_builder.start_band_2d(data_type, nodata_value.as_deref())?; let pixel_count = tile_width * tile_height; - // Determine which corner position (if any) should have nodata in this tile let corner_position = get_corner_position(tile_x, tile_y, x_tiles, y_tiles, tile_width, tile_height); @@ -162,28 +140,12 @@ pub fn generate_tiled_rasters( /// Useful for testing error handling of inverse affine transforms. pub fn build_noninvertible_raster() -> StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 0.0, - scale_y: 0.0, - skew_x: 0.0, - skew_y: 0.0, - }; let crs = lnglat().unwrap().to_crs_string(); builder - .start_raster(&metadata, Some(&crs)) + .start_raster_2d(1, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, Some(&crs)) .expect("start raster"); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_band_2d(BandDataType::UInt8, None) .expect("start band"); builder.band_data_writer().append_value([0u8]); builder.finish_band().expect("finish band"); @@ -191,31 +153,6 @@ pub fn build_noninvertible_raster() -> StructArray { builder.finish().expect("finish") } -/// Builds a single raster with in-db bands from explicit metadata and raw band bytes. -pub fn build_in_db_raster( - metadata: RasterMetadata, - crs: Option<&str>, - bands: &[InDbTestBand], -) -> StructArray { - let mut builder = RasterBuilder::new(1); - builder.start_raster(&metadata, crs).expect("start raster"); - for band in bands { - builder - .start_band(BandMetadata { - datatype: band.datatype, - nodata_value: band.nodata_value.clone(), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) - .expect("start band"); - builder.band_data_writer().append_value(&band.data); - builder.finish_band().expect("finish band"); - } - builder.finish_raster().expect("finish raster"); - builder.finish().expect("finish") -} - /// Builds a single-band raster from raw bytes for tests. pub fn raster_from_single_band( width: usize, @@ -224,74 +161,68 @@ pub fn raster_from_single_band( band_bytes: &[u8], crs: Option<&str>, ) -> StructArray { - let metadata = RasterMetadata { - width: width as u64, - height: height as u64, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - build_in_db_raster( - metadata, - crs, - &[InDbTestBand { - datatype: data_type, - nodata_value: None, - data: band_bytes.to_vec(), - }], - ) + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + width as u64, + height as u64, + 0.0, + 0.0, + 1.0, + -1.0, + 0.0, + 0.0, + crs, + ) + .expect("start raster"); + builder.start_band_2d(data_type, None).expect("start band"); + builder.band_data_writer().append_value(band_bytes); + builder.finish_band().expect("finish band"); + builder.finish_raster().expect("finish raster"); + builder.finish().expect("finish") } /// Builds a single raster with 3 bands of different types for testing multi-band operations. /// Band 1: UInt8 (nodata=255), Band 2: UInt16 (nodata=0), Band 3: Float32 (no nodata). /// Each band is 2x2 pixels. pub fn generate_multi_band_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); let crs = lnglat().unwrap().to_crs_string(); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 10.0, - upperleft_y: 20.0, - scale_x: 0.5, - scale_y: -0.5, - skew_x: 0.0, - skew_y: 0.0, - }; + builder + .start_raster_2d(2, 2, 10.0, 20.0, 0.5, -0.5, 0.0, 0.0, Some(&crs)) + .unwrap(); + + // Band 1: UInt8, nodata=255 + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder + .band_data_writer() + .append_value([1u8, 2u8, 3u8, 4u8]); + builder.finish_band().unwrap(); + // Band 2: UInt16, nodata=0 + builder + .start_band_2d(BandDataType::UInt16, Some(&[0u8, 0u8])) + .unwrap(); let band2_data: Vec = [100u16, 200u16, 300u16, 400u16] .iter() .flat_map(|v| v.to_le_bytes()) .collect(); + builder.band_data_writer().append_value(&band2_data); + builder.finish_band().unwrap(); + + // Band 3: Float32, no nodata + builder.start_band_2d(BandDataType::Float32, None).unwrap(); let band3_data: Vec = [1.5f32, 2.5f32, 3.5f32, 4.5f32] .iter() .flat_map(|v| v.to_le_bytes()) .collect(); + builder.band_data_writer().append_value(&band3_data); + builder.finish_band().unwrap(); - build_in_db_raster( - metadata, - Some(&crs), - &[ - InDbTestBand { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![255u8]), - data: vec![1u8, 2u8, 3u8, 4u8], - }, - InDbTestBand { - datatype: BandDataType::UInt16, - nodata_value: Some(vec![0u8, 0u8]), - data: band2_data, - }, - InDbTestBand { - datatype: BandDataType::Float32, - nodata_value: None, - data: band3_data, - }, - ], - ) + builder.finish_raster().unwrap(); + builder.finish().unwrap() } /// Determine if this tile contains a corner of the overall grid and return its position @@ -330,8 +261,6 @@ fn generate_random_band_data( corner_position: Option, rng: &mut Rng, ) -> Vec { - /// Generate random band data for a given pixel type and set the corner pixel - /// to the nodata value if applicable. macro_rules! gen_band { ($byte_size:expr, $rng_expr:expr) => {{ let byte_size: usize = $byte_size; @@ -399,83 +328,71 @@ pub fn assert_raster_arrays_equal( /// Compare two rasters for equality pub fn assert_raster_equal(raster1: &impl RasterRef, raster2: &impl RasterRef) { // Compare metadata - let meta1 = raster1.metadata(); - let meta2 = raster2.metadata(); - assert_eq!(meta1.width(), meta2.width(), "Raster widths do not match"); - assert_eq!( - meta1.height(), - meta2.height(), - "Raster heights do not match" - ); assert_eq!( - meta1.upper_left_x(), - meta2.upper_left_x(), - "Raster upper left x does not match" + raster1.width(), + raster2.width(), + "Raster widths do not match" ); assert_eq!( - meta1.upper_left_y(), - meta2.upper_left_y(), - "Raster upper left y does not match" + raster1.height(), + raster2.height(), + "Raster heights do not match" ); assert_eq!( - meta1.scale_x(), - meta2.scale_x(), - "Raster scale x does not match" + raster1.transform(), + raster2.transform(), + "Raster transforms do not match" ); assert_eq!( - meta1.scale_y(), - meta2.scale_y(), - "Raster scale y does not match" + raster1.spatial_dims(), + raster2.spatial_dims(), + "Raster spatial_dims do not match" ); assert_eq!( - meta1.skew_x(), - meta2.skew_x(), - "Raster skew x does not match" + raster1.spatial_shape(), + raster2.spatial_shape(), + "Raster spatial_shape does not match" ); assert_eq!( - meta1.skew_y(), - meta2.skew_y(), - "Raster skew y does not match" + raster1.num_bands(), + raster2.num_bands(), + "Number of bands do not match" ); // Compare bands - let bands1 = raster1.bands(); - let bands2 = raster2.bands(); - assert_eq!(bands1.len(), bands2.len(), "Number of bands do not match"); + for band_index in 0..raster1.num_bands() { + let band1 = raster1 + .band(band_index) + .unwrap_or_else(|| panic!("Band {band_index} missing from raster1")); + let band2 = raster2 + .band(band_index) + .unwrap_or_else(|| panic!("Band {band_index} missing from raster2")); - for band_index in 0..bands1.len() { - let band1 = bands1.band(band_index + 1).unwrap(); - let band2 = bands2.band(band_index + 1).unwrap(); - - let band_meta1 = band1.metadata(); - let band_meta2 = band2.metadata(); assert_eq!( - band_meta1.data_type().unwrap(), - band_meta2.data_type().unwrap(), - "Band data types do not match" + band1.data_type(), + band2.data_type(), + "Band {band_index} data types do not match" ); assert_eq!( - band_meta1.nodata_value(), - band_meta2.nodata_value(), - "Band nodata values do not match" + band1.nodata(), + band2.nodata(), + "Band {band_index} nodata values do not match" ); assert_eq!( - band_meta1.storage_type().unwrap(), - band_meta2.storage_type().unwrap(), - "Band storage types do not match" + band1.dim_names(), + band2.dim_names(), + "Band {band_index} dim_names do not match" ); assert_eq!( - band_meta1.outdb_url(), - band_meta2.outdb_url(), - "Band outdb URLs do not match" + band1.shape(), + band2.shape(), + "Band {band_index} shapes do not match" ); assert_eq!( - band_meta1.outdb_band_id(), - band_meta2.outdb_band_id(), - "Band outdb band IDs do not match" + band1.contiguous_data().unwrap().as_ref(), + band2.contiguous_data().unwrap().as_ref(), + "Band {band_index} data does not match" ); - - assert_eq!(band1.data(), band2.data(), "Band data does not match"); } } @@ -494,26 +411,23 @@ mod tests { for i in 0..count { let raster = raster_array.get(i).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), i as u64 + 1); - assert_eq!(metadata.height(), i as u64 + 2); - assert_eq!(metadata.upper_left_x(), i as f64 + 1.0); - assert_eq!(metadata.upper_left_y(), i as f64 + 2.0); - assert_eq!(metadata.scale_x(), (i.max(1) as f64) * 0.1); - assert_eq!(metadata.scale_y(), (i.max(1) as f64) * -0.2); - assert_eq!(metadata.skew_x(), (i as f64) * 0.03); - assert_eq!(metadata.skew_y(), (i as f64) * 0.04); - - let bands = raster.bands(); - let band = bands.band(1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt16); - assert_eq!(band_metadata.nodata_value(), Some(&[0u8, 0u8][..])); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_metadata.outdb_url(), None); - assert_eq!(band_metadata.outdb_band_id(), None); - - let band_data = band.data(); + assert_eq!(raster.width(), Some(i as u64 + 1)); + assert_eq!(raster.height(), Some(i as u64 + 2)); + + let t = raster.transform(); + assert_eq!(t[0], i as f64 + 1.0); // origin_x + assert_eq!(t[3], i as f64 + 2.0); // origin_y + assert_eq!(t[1], (i.max(1) as f64) * 0.1); // scale_x + assert_eq!(t[5], (i.max(1) as f64) * -0.2); // scale_y + assert_eq!(t[2], (i as f64) * 0.03); // skew_x + assert_eq!(t[4], (i as f64) * 0.04); // skew_y + + assert_eq!(raster.num_bands(), 1); + let band = raster.band(0).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt16); + assert_eq!(band.nodata(), Some(&[0u8, 0u8][..])); + + let band_data = band.contiguous_data().unwrap(); let expected_pixel_count = (i + 1) * (i + 2); // width * height // Convert raw bytes back to u16 values for comparison @@ -538,29 +452,49 @@ mod tests { assert_eq!(raster_array.len(), 16); // 4x4 tiles for i in 0..16 { let raster = raster_array.get(i).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 64); - assert_eq!(metadata.height(), 64); - assert_eq!(metadata.upper_left_x(), ((i % 4) * 64) as f64); - assert_eq!(metadata.upper_left_y(), ((i / 4) * 64) as f64); - let bands = raster.bands(); - assert_eq!(bands.len(), 3); + assert_eq!(raster.width(), Some(64)); + assert_eq!(raster.height(), Some(64)); + let t = raster.transform(); + assert_eq!(t[0], ((i % 4) * 64) as f64); // origin_x + assert_eq!(t[3], ((i / 4) * 64) as f64); // origin_y + assert_eq!(raster.num_bands(), 3); for band_index in 0..3 { - let band = bands.band(band_index + 1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); - let band_data = band.data(); - assert_eq!(band_data.len(), 64 * 64); // 4096 pixels + let band = raster.band(band_index).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.contiguous_data().unwrap().len(), 64 * 64); // 4096 pixels } } } + #[test] + fn test_generate_multi_band_raster() { + let struct_array = generate_multi_band_raster(); + let raster_array = RasterStructArray::new(&struct_array); + assert_eq!(raster_array.len(), 1); + + let raster = raster_array.get(0).unwrap(); + assert_eq!(raster.width(), Some(2)); + assert_eq!(raster.height(), Some(2)); + assert_eq!(raster.num_bands(), 3); + + let b1 = raster.band(0).unwrap(); + assert_eq!(b1.data_type(), BandDataType::UInt8); + assert_eq!(b1.nodata(), Some(&[255u8][..])); + assert_eq!(b1.contiguous_data().unwrap().as_ref(), &[1u8, 2, 3, 4]); + + let b2 = raster.band(1).unwrap(); + assert_eq!(b2.data_type(), BandDataType::UInt16); + assert_eq!(b2.nodata(), Some(&[0u8, 0][..])); + + let b3 = raster.band(2).unwrap(); + assert_eq!(b3.data_type(), BandDataType::Float32); + assert_eq!(b3.nodata(), None); + } + #[test] fn test_raster_arrays_equal() { let raster_array1 = generate_test_rasters(3, None).unwrap(); let raster_struct_array1 = RasterStructArray::new(&raster_array1); - // Test that identical arrays are equal assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array1); } @@ -569,8 +503,6 @@ mod tests { fn test_raster_arrays_not_equal() { let raster_array1 = generate_test_rasters(3, None).unwrap(); let raster_struct_array1 = RasterStructArray::new(&raster_array1); - - // Test that arrays with different lengths are not equal let raster_array2 = generate_test_rasters(4, None).unwrap(); let raster_struct_array2 = RasterStructArray::new(&raster_array2); assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array2); @@ -580,65 +512,33 @@ mod tests { fn test_raster_equal() { let raster_array1 = generate_tiled_rasters((256, 256), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); - let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); - - // Assert that the rasters are equal to themselves + let rsa = RasterStructArray::new(&raster_array1); + let raster1 = rsa.get(0).unwrap(); assert_raster_equal(&raster1, &raster1); } #[test] - #[should_panic = "Band data does not match"] + #[should_panic = "Band 0 data does not match"] fn test_raster_different_band_data() { let raster_array1 = generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); let raster_array2 = generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(47)).unwrap(); - - let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); - let raster2 = RasterStructArray::new(&raster_array2).get(0).unwrap(); + let rsa1 = RasterStructArray::new(&raster_array1); + let rsa2 = RasterStructArray::new(&raster_array2); + let raster1 = rsa1.get(0).unwrap(); + let raster2 = rsa2.get(0).unwrap(); assert_raster_equal(&raster1, &raster2); } #[test] - fn test_generate_multi_band_raster() { - let struct_array = generate_multi_band_raster(); - let raster_array = RasterStructArray::new(&struct_array); - assert_eq!(raster_array.len(), 1); - - let raster = raster_array.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 2); - assert_eq!(metadata.height(), 2); - assert_eq!(metadata.upper_left_x(), 10.0); - assert_eq!(metadata.upper_left_y(), 20.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 3); - - // Band 1: UInt8, nodata=255 - let b1 = bands.band(1).unwrap(); - assert_eq!(b1.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(b1.metadata().nodata_value(), Some(&[255u8][..])); - assert_eq!(b1.data(), &[1u8, 2, 3, 4]); - - // Band 2: UInt16, nodata=0 - let b2 = bands.band(2).unwrap(); - assert_eq!(b2.metadata().data_type().unwrap(), BandDataType::UInt16); - assert_eq!(b2.metadata().nodata_value(), Some(&[0u8, 0][..])); - - // Band 3: Float32, no nodata - let b3 = bands.band(3).unwrap(); - assert_eq!(b3.metadata().data_type().unwrap(), BandDataType::Float32); - assert_eq!(b3.metadata().nodata_value(), None); - } - - #[test] - #[should_panic = "Raster upper left x does not match"] + #[should_panic = "Raster transforms do not match"] fn test_raster_different_metadata() { let raster_array = generate_tiled_rasters((128, 128), (2, 1), BandDataType::UInt8, Some(43)).unwrap(); - let raster1 = RasterStructArray::new(&raster_array).get(0).unwrap(); - let raster2 = RasterStructArray::new(&raster_array).get(1).unwrap(); + let rsa = RasterStructArray::new(&raster_array); + let raster1 = rsa.get(0).unwrap(); + let raster2 = rsa.get(1).unwrap(); assert_raster_equal(&raster1, &raster2); } } From 7a1c7e7fcbd494902f01598cc659849e26a40db7 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Mon, 4 May 2026 16:09:02 -0700 Subject: [PATCH 4/6] feat(raster-gdal): port loader to canonical N-D schema Reads outdb_uri + parse_outdb_source instead of #787's storage_type / outdb_url / outdb_band_id triplet. Each GDAL-backed SQL function gates on BandRef::is_2d() at entry and returns an Execution error on N-D input. VSI normalization, the dataset cache, and RasterIO bodies are byte-for-byte unchanged from #787 - only the schema-read sites move. In-db reads use BandRef::contiguous_data() and require Cow::Borrowed so MEM datasets can point at the StructArray's backing buffer without copying; for is_2d identity views this always holds. Tests rebuilt to use RasterBuilder directly. Adds an N-D rejection test for raster_ref_to_gdal_mem and the VRT path, plus an end-to-end --- rust/sedona-raster-gdal/src/gdal_common.rs | 366 ++++++----- .../src/gdal_dataset_provider.rs | 585 +++++++++++------- rust/sedona-raster-gdal/src/lib.rs | 5 - rust/sedona-raster-gdal/src/utils.rs | 474 -------------- 4 files changed, 547 insertions(+), 883 deletions(-) delete mode 100644 rust/sedona-raster-gdal/src/utils.rs diff --git a/rust/sedona-raster-gdal/src/gdal_common.rs b/rust/sedona-raster-gdal/src/gdal_common.rs index 0c96fd1cb..8e5295964 100644 --- a/rust/sedona-raster-gdal/src/gdal_common.rs +++ b/rust/sedona-raster-gdal/src/gdal_common.rs @@ -19,13 +19,14 @@ use sedona_gdal::dataset::Dataset; use sedona_gdal::errors::GdalError; use sedona_gdal::gdal::Gdal; use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY, GDAL_OF_VERBOSE_ERROR}; -use sedona_gdal::geo_transform::GeoTransform; use sedona_gdal::mem::MemDatasetBuilder; use sedona_gdal::raster::types::DatasetOptions; use sedona_gdal::raster::types::GdalDataType; -use sedona_raster::traits::{MetadataRef, RasterMetadata, RasterRef}; -use sedona_schema::raster::{BandDataType, StorageType}; +use std::borrow::Cow; + +use sedona_raster::traits::{BandRef, RasterRef}; +use sedona_schema::raster::BandDataType; use datafusion_common::{ arrow_datafusion_err, exec_datafusion_err, exec_err, DataFusionError, Result, @@ -43,47 +44,6 @@ where } } -/// Convert raster metadata into GDAL's six-element geo-transform. -/// -/// GDAL stores geo-transforms as -/// `[origin_x, pixel_width, rotation_x, origin_y, rotation_y, pixel_height]`. -pub(crate) trait ToGdalGeoTransform { - fn to_gdal_geotransform(&self) -> GeoTransform; -} - -impl ToGdalGeoTransform for T { - fn to_gdal_geotransform(&self) -> GeoTransform { - [ - self.upper_left_x(), - self.scale_x(), - self.skew_x(), - self.upper_left_y(), - self.skew_y(), - self.scale_y(), - ] - } -} - -/// Reconstruct raster metadata from a GDAL six-element geo-transform and raster dimensions. -pub(crate) trait RasterMetadataFromGdalGeoTransform { - fn to_raster_metadata(&self, width: usize, height: usize) -> RasterMetadata; -} - -impl RasterMetadataFromGdalGeoTransform for GeoTransform { - fn to_raster_metadata(&self, width: usize, height: usize) -> RasterMetadata { - RasterMetadata { - width: width as u64, - height: height as u64, - upperleft_x: self[0], - upperleft_y: self[3], - scale_x: self[1], - scale_y: self[5], - skew_x: self[2], - skew_y: self[4], - } - } -} - /// Converts a BandDataType to the corresponding GDAL data type. pub fn band_data_type_to_gdal(band_type: &BandDataType) -> GdalDataType { match band_type { @@ -182,6 +142,25 @@ pub(crate) fn convert_gdal_err(e: GdalError) -> DataFusionError { DataFusionError::External(Box::new(e)) } +/// Resolve a 1-based band index into a `BandRef`, translating to the trait's +/// 0-based API and producing a clear error when out of range or zero. The +/// 1-based convention is preserved at the public boundary because callers +/// already construct band-index lists that way. +fn resolve_band( + raster: &R, + one_based_index: usize, +) -> Result> { + if one_based_index == 0 { + return exec_err!("Band index must be 1-based; got 0"); + } + raster.band(one_based_index - 1).ok_or_else(|| { + exec_datafusion_err!( + "Band index {one_based_index} is out of range (raster has {} bands)", + raster.num_bands() + ) + }) +} + /// This function creates a GDAL dataset backed by the MEM driver that directly /// references the band data stored in the [RasterRef]. No data copying occurs - /// the GDAL bands point to the same memory as the data buffer held by [RasterRef]. @@ -195,6 +174,7 @@ pub(crate) fn convert_gdal_err(e: GdalError) -> DataFusionError { /// /// # Errors /// Returns an error if: +/// - Any band is N-D (not the legacy `["y","x"]` 2-D shape with identity view) /// - Any band uses OutDb storage /// - GDAL driver operations fail /// - Accessing RasterRef fails @@ -203,11 +183,14 @@ pub unsafe fn raster_ref_to_gdal_mem( raster: &R, band_indices: &[usize], ) -> Result { - let metadata = raster.metadata(); - let bands = raster.bands(); - - let width = metadata.width() as usize; - let height = metadata.height() as usize; + let width = raster + .width() + .ok_or_else(|| exec_datafusion_err!("Raster has no width (spatial_shape missing)"))? + as usize; + let height = raster + .height() + .ok_or_else(|| exec_datafusion_err!("Raster has no height (spatial_shape missing)"))? + as usize; // Create internal MEM dataset via sedona-gdal shim to avoid open dataset list contention. let mut mem_ds_builder = MemDatasetBuilder::new(width, height); @@ -218,21 +201,39 @@ pub unsafe fn raster_ref_to_gdal_mem( // is sequential (1..=band_indices.len()), even if the source band indices are // sparse (e.g. [1, 3]). for &src_band_index in band_indices.iter() { - let band = bands - .band(src_band_index) - .map_err(|e| arrow_datafusion_err!(e))?; + let band = resolve_band(raster, src_band_index)?; - if band.metadata().storage_type()? != StorageType::InDb { + if !band.is_2d() { + return exec_err!( + "GDAL backend requires a 2-dim band; got dim_names={:?}", + band.dim_names() + ); + } + + if !band.is_indb() { return Err(DataFusionError::NotImplemented( "OutDb bands are not supported in raster_to_mem_dataset".to_string(), )); } - let band_metadata = band.metadata(); - let band_type = band_metadata.data_type()?; + let band_type = band.data_type(); let gdal_type = band_data_type_to_gdal(&band_type); - let band_data = band.data(); - let data_ptr = band_data.as_ptr(); + // contiguous_data() is Cow::Borrowed for is_2d identity views; the + // borrow points at the StructArray's backing buffer, which outlives + // the dataset (held by the caller). For Cow::Owned the pointer would + // dangle the moment the Cow drops, so we reject that case loudly. + let band_data = band + .contiguous_data() + .map_err(|e| arrow_datafusion_err!(e))?; + let bytes: &[u8] = match &band_data { + Cow::Borrowed(b) => b, + Cow::Owned(_) => { + return exec_err!( + "Internal: contiguous_data must be borrowed for is_2d bands; got owned" + ); + } + }; + let data_ptr = bytes.as_ptr(); unsafe { mem_ds_builder = mem_ds_builder.add_band(gdal_type, data_ptr as *mut u8); } @@ -244,7 +245,12 @@ pub unsafe fn raster_ref_to_gdal_mem( .map_err(|e| DataFusionError::External(Box::new(e)))? }; - let geotransform = metadata.to_gdal_geotransform(); + let geotransform: [f64; 6] = raster.transform().try_into().map_err(|_| { + exec_datafusion_err!( + "Raster transform must be exactly 6 elements; got {}", + raster.transform().len() + ) + })?; dataset .set_geo_transform(&geotransform) @@ -257,12 +263,9 @@ pub unsafe fn raster_ref_to_gdal_mem( for (dst_band_index, &src_band_index) in band_indices.iter().enumerate() { let dst_band_index = dst_band_index + 1; - let band = bands - .band(src_band_index) - .map_err(|e| arrow_datafusion_err!(e))?; - let band_metadata = band.metadata(); - let band_type = band_metadata.data_type()?; - if let Some(nodata_bytes) = band_metadata.nodata_value() { + let band = resolve_band(raster, src_band_index)?; + let band_type = band.data_type(); + if let Some(nodata_bytes) = band.nodata() { let raster_band = dataset .rasterband(dst_band_index) .map_err(convert_gdal_err)?; @@ -428,16 +431,73 @@ fn strip_scheme_prefix<'a>(value: &'a str, scheme_prefix: &str) -> Option<&'a st mod tests { use super::*; + use arrow_array::StructArray; use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; - use sedona_schema::raster::StorageType; - use sedona_testing::rasters::{build_in_db_raster, InDbTestBand}; - fn single_raster<'a>( - raster_array: &'a arrow_array::StructArray, - ) -> impl sedona_raster::traits::RasterRef + 'a { - RasterStructArray::new(raster_array).get(0).unwrap() + /// Affine: (origin_x, scale_x, skew_x, origin_y, skew_y, scale_y) — same + /// element order as the canonical 6-element transform. + type Transform2d = (f64, f64, f64, f64, f64, f64); + + /// 2D in-db test band: `(data_type, nodata_bytes_or_none, raw_pixel_bytes)`. + type InDbTestBand2d = (BandDataType, Option>, Vec); + + fn build_indb_raster_2d( + width: u64, + height: u64, + transform: Transform2d, + crs: Option<&str>, + bands: &[InDbTestBand2d], + ) -> StructArray { + let (origin_x, scale_x, skew_x, origin_y, skew_y, scale_y) = transform; + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + width, height, origin_x, origin_y, scale_x, scale_y, skew_x, skew_y, crs, + ) + .unwrap(); + for (data_type, nodata, data) in bands { + builder + .start_band_2d(*data_type, nodata.as_deref()) + .unwrap(); + builder.band_data_writer().append_value(data); + builder.finish_band().unwrap(); + } + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + fn build_outdb_raster_2d( + width: u64, + height: u64, + transform: Transform2d, + crs: Option<&str>, + outdb_uri: &str, + data_type: BandDataType, + nodata: Option<&[u8]>, + ) -> StructArray { + let (origin_x, scale_x, skew_x, origin_y, skew_y, scale_y) = transform; + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + width, height, origin_x, origin_y, scale_x, scale_y, skew_x, skew_y, crs, + ) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[height, width], + data_type, + nodata, + Some(outdb_uri), + Some("geotiff"), + ) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() } fn read_band_u64(dataset: &Dataset, band_index: usize, size: (usize, usize)) -> Vec { @@ -458,40 +518,6 @@ mod tests { .contains("AUTHORITY[\"EPSG\",\"4326\"]")); } - #[test] - fn test_to_gdal_geotransform() { - let metadata = RasterMetadata { - width: 3, - height: 2, - upperleft_x: 10.0, - upperleft_y: 20.0, - scale_x: 0.5, - scale_y: -0.5, - skew_x: 0.1, - skew_y: -0.2, - }; - - assert_eq!( - metadata.to_gdal_geotransform(), - [10.0, 0.5, 0.1, 20.0, -0.2, -0.5] - ); - } - - #[test] - fn test_to_raster_metadata() { - let geotransform: GeoTransform = [12.5, 0.25, 0.75, -8.0, -0.5, -2.0]; - let metadata = geotransform.to_raster_metadata(4, 3); - - assert_eq!(metadata.width, 4); - assert_eq!(metadata.height, 3); - assert_eq!(metadata.upperleft_x, 12.5); - assert_eq!(metadata.upperleft_y, -8.0); - assert_eq!(metadata.scale_x, 0.25); - assert_eq!(metadata.scale_y, -2.0); - assert_eq!(metadata.skew_x, 0.75); - assert_eq!(metadata.skew_y, -0.5); - } - #[test] fn test_band_data_type_to_gdal() { assert_eq!( @@ -696,18 +722,15 @@ mod tests { #[test] fn test_raster_ref_to_gdal_empty_preserves_metadata_and_crs() { - let metadata = RasterMetadata { - width: 3, - height: 2, - upperleft_x: 10.0, - upperleft_y: 20.0, - scale_x: 0.5, - scale_y: -0.5, - skew_x: 0.1, - skew_y: -0.2, - }; - let raster_array = build_in_db_raster(metadata, Some("EPSG:4326"), &[]); - let raster = single_raster(&raster_array); + let raster_array = build_indb_raster_2d( + 3, + 2, + (10.0, 0.5, 0.1, 20.0, -0.2, -0.5), + Some("EPSG:4326"), + &[], + ); + let raster_struct_array = RasterStructArray::new(&raster_array); + let raster = raster_struct_array.get(0).unwrap(); with_gdal(|gdal| { let dataset = raster_ref_to_gdal_empty(gdal, &raster)?; @@ -725,16 +748,6 @@ mod tests { #[test] fn test_raster_ref_to_gdal_mem_preserves_band_order_data_and_nodata() { - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 5.0, - upperleft_y: 8.0, - scale_x: 2.0, - scale_y: -2.0, - skew_x: 0.0, - skew_y: 0.0, - }; let uint64_pixels = [1u64, 2, 3, 4] .into_iter() .flat_map(u64::to_le_bytes) @@ -746,28 +759,27 @@ mod tests { .collect::>(); let uint64_nodata = 9_007_199_254_740_992u64; let int64_nodata = -9_007_199_254_740_992i64; - let raster_array = build_in_db_raster( - metadata, + let raster_array = build_indb_raster_2d( + 2, + 2, + (5.0, 2.0, 0.0, 8.0, 0.0, -2.0), Some("EPSG:4326"), &[ - InDbTestBand { - datatype: BandDataType::UInt64, - nodata_value: Some(uint64_nodata.to_le_bytes().to_vec()), - data: uint64_pixels, - }, - InDbTestBand { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![255u8]), - data: uint8_pixels, - }, - InDbTestBand { - datatype: BandDataType::Int64, - nodata_value: Some(int64_nodata.to_le_bytes().to_vec()), - data: int64_pixels, - }, + ( + BandDataType::UInt64, + Some(uint64_nodata.to_le_bytes().to_vec()), + uint64_pixels, + ), + (BandDataType::UInt8, Some(vec![255u8]), uint8_pixels), + ( + BandDataType::Int64, + Some(int64_nodata.to_le_bytes().to_vec()), + int64_pixels, + ), ], ); - let raster = single_raster(&raster_array); + let raster_struct_array = RasterStructArray::new(&raster_array); + let raster = raster_struct_array.get(0).unwrap(); with_gdal(|gdal| { let dataset = unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[3, 1])? }; @@ -793,36 +805,58 @@ mod tests { #[test] fn test_raster_ref_to_gdal_mem_rejects_outdb_bands() { + let raster_array = build_outdb_raster_2d( + 1, + 1, + (0.0, 1.0, 0.0, 1.0, 0.0, -1.0), + None, + "/tmp/test.tif", + BandDataType::UInt8, + Some(&[0u8]), + ); + let raster_struct_array = RasterStructArray::new(&raster_array); + let raster = raster_struct_array.get(0).unwrap(); + + let err = with_gdal(|gdal| unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[1]) }) + .err() + .unwrap(); + assert!(err.to_string().contains("OutDb bands are not supported")); + } + + #[test] + fn test_raster_ref_to_gdal_mem_rejects_nd_bands() { + // Build a 3-D in-db band shaped ["time","y","x"] over a 2-D raster. + // The N-D guard should fire before any GDAL call. let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 1.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, None).unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![0u8]), - storage_type: StorageType::OutDbRef, - outdb_url: Some("/tmp/test.tif".to_string()), - outdb_band_id: Some(1), - }) + .start_raster_2d(2, 2, 0.0, 2.0, 1.0, -1.0, 0.0, 0.0, None) .unwrap(); - builder.band_data_writer().append_value([]); + builder + .start_band( + None, + &["time", "y", "x"], + &[3, 2, 2], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 3 * 2 * 2]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); let raster_array = builder.finish().unwrap(); - let raster = single_raster(&raster_array); + let raster_struct_array = RasterStructArray::new(&raster_array); + let raster = raster_struct_array.get(0).unwrap(); let err = with_gdal(|gdal| unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[1]) }) .err() .unwrap(); - assert!(err.to_string().contains("OutDb bands are not supported")); + assert!( + err.to_string().contains("requires a 2-dim band"), + "got: {err}" + ); } } diff --git a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs index 03e45621e..4fb774d70 100644 --- a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs +++ b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs @@ -19,9 +19,7 @@ use std::convert::TryInto; use std::{cell::RefCell, marker::PhantomData, num::NonZeroUsize, rc::Rc}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{ - arrow_datafusion_err, exec_datafusion_err, exec_err, DataFusionError, Result, -}; +use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError, Result}; use sedona_gdal::dataset::Dataset; use sedona_gdal::gdal::Gdal; @@ -30,12 +28,13 @@ use sedona_gdal::raster::types::GdalDataType; use sedona_common::SedonaOptions; use sedona_raster::traits::RasterRef; -use sedona_schema::raster::{BandDataType, StorageType}; +use sedona_schema::raster::BandDataType; use crate::gdal_common::{ band_data_type_to_gdal, bytes_to_f64, convert_gdal_err, normalize_outdb_source_path, - open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem, ToGdalGeoTransform, + open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem, }; +use crate::source_uri::parse_outdb_source; /// A GDAL dataset constructed from a `RasterRef`. /// @@ -211,43 +210,50 @@ impl GDALDatasetCache { raster: &R, gdal_mem_source: Option<&Rc>, ) -> Result<(Rc, Vec>)> { - let metadata = raster.metadata(); - let bands = raster.bands(); - let num_bands = bands.len(); - - let metadata_width = metadata.width(); - let metadata_height = metadata.height(); - let width: i32 = metadata_width.try_into().map_err(|_| { + let num_bands = raster.num_bands(); + + let raster_width = raster + .width() + .ok_or_else(|| exec_datafusion_err!("Raster has no width (spatial_shape missing)"))?; + let raster_height = raster + .height() + .ok_or_else(|| exec_datafusion_err!("Raster has no height (spatial_shape missing)"))?; + let width: i32 = raster_width.try_into().map_err(|_| { exec_datafusion_err!( "Raster width {} exceeds supported GDAL/i32 limit {}", - metadata_width, + raster_width, i32::MAX ) })?; - let height: i32 = metadata_height.try_into().map_err(|_| { + let height: i32 = raster_height.try_into().map_err(|_| { exec_datafusion_err!( "Raster height {} exceeds supported GDAL/i32 limit {}", - metadata_height, + raster_height, i32::MAX ) })?; - let vrt_width: usize = metadata_width.try_into().map_err(|_| { + let vrt_width: usize = raster_width.try_into().map_err(|_| { exec_datafusion_err!( "Raster width {} exceeds supported GDAL/usize limit", - metadata_width + raster_width ) })?; - let vrt_height: usize = metadata_height.try_into().map_err(|_| { + let vrt_height: usize = raster_height.try_into().map_err(|_| { exec_datafusion_err!( "Raster height {} exceeds supported GDAL/usize limit", - metadata_height + raster_height ) })?; let mut vrt = gdal .create_vrt(vrt_width, vrt_height) .map_err(convert_gdal_err)?; - let geotransform = metadata.to_gdal_geotransform(); + let geotransform: [f64; 6] = raster.transform().try_into().map_err(|_| { + exec_datafusion_err!( + "Raster transform must be exactly 6 elements; got {}", + raster.transform().len() + ) + })?; vrt.set_geo_transform(&geotransform) .map_err(convert_gdal_err)?; if let Some(crs) = raster.crs() { @@ -257,10 +263,19 @@ impl GDALDatasetCache { let mut outdb_sources: Vec> = Vec::new(); let mut mem_band_index: usize = 1; - for i in 1..=num_bands { - let band = bands.band(i).map_err(|e| arrow_datafusion_err!(e))?; - let band_metadata = band.metadata(); - let band_type = band_metadata.data_type()?; + for i in 0..num_bands { + let band = raster + .band(i) + .ok_or_else(|| exec_datafusion_err!("Band index {} is out of range", i))?; + + if !band.is_2d() { + return exec_err!( + "GDAL backend requires 2-dim bands; got dim_names={:?}", + band.dim_names() + ); + } + + let band_type = band.data_type(); let gdal_type = band_data_type_to_gdal(&band_type); if matches!(gdal_type, GdalDataType::Unknown) { return Err(DataFusionError::NotImplemented(format!( @@ -269,10 +284,12 @@ impl GDALDatasetCache { ))); } + // VRT bands are 1-based. + let vrt_band_num = i + 1; vrt.add_band(gdal_type, None).map_err(convert_gdal_err)?; - let vrt_band = vrt.rasterband(i).map_err(convert_gdal_err)?; + let vrt_band = vrt.rasterband(vrt_band_num).map_err(convert_gdal_err)?; - if let Some(nodata_bytes) = band_metadata.nodata_value() { + if let Some(nodata_bytes) = band.nodata() { match band_type { BandDataType::UInt64 => { let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| { @@ -301,76 +318,68 @@ impl GDALDatasetCache { } } - match band_metadata.storage_type()? { - StorageType::OutDbRef => { - let url = band_metadata.outdb_url().ok_or_else(|| { - exec_datafusion_err!("Band {} is out-db but missing outdb_url", i) - })?; - let source_band_num: usize = band_metadata - .outdb_band_id() - .ok_or_else(|| { - exec_datafusion_err!("Band {} is out-db but missing band_id", i) - })? - .try_into() - .map_err(|_| { - exec_datafusion_err!("Band {} out-db band_id is too large", i) - })?; - - let source_dataset = self.get_or_create_outdb_source(gdal, url, None)?; - - // If GDALGetGeoTransform(hdsSrc, ogt) fails, we fall back to (0, 1, 0, 0, 0, -1), - // which is the identity transform. - let src_geo_transform = source_dataset - .geo_transform() - .unwrap_or([0.0, 1.0, 0.0, 0.0, 0.0, -1.0]); - let (src_w, src_h) = source_dataset.raster_size(); - - // Compute source and destination windows for the VRT simple source. The VRT usually only - // clip a small portion of the source dataset. - let Some((src_window, dst_window)) = compute_vrt_simple_source_windows( - &geotransform, - (width, height), - &src_geo_transform, - (src_w as i32, src_h as i32), - )? - else { - // No spatial overlap between the target raster and the source dataset. - // Leave the VRT band as nodata. - continue; - }; - - let source_band = source_dataset - .rasterband(source_band_num) - .map_err(convert_gdal_err)?; - - vrt_band - // Avoid passing per-source NODATA to VRT simple sources; some GDAL builds - // warn that NODATA isn't supported for neighbour-sampled simple sources - // on virtual datasources. We set band-level NODATA via set_no_data_value. - .add_simple_source(&source_band, src_window, dst_window, None, None) - .map_err(convert_gdal_err)?; - - outdb_sources.push(source_dataset); - } - StorageType::InDb => { - let mem_dataset = gdal_mem_source - .as_ref() - .expect("in-db dataset should exist"); - let source_band = mem_dataset - .rasterband(mem_band_index) - .map_err(convert_gdal_err)?; - mem_band_index += 1; - - vrt_band - .add_simple_source( - &source_band, - (0, 0, width, height), - (0, 0, width, height), - None, - None, - ) - .map_err(convert_gdal_err)?; - } + if band.is_indb() { + let mem_dataset = gdal_mem_source + .as_ref() + .expect("in-db dataset should exist"); + let source_band = mem_dataset + .rasterband(mem_band_index) + .map_err(convert_gdal_err)?; + mem_band_index += 1; + + vrt_band + .add_simple_source( + &source_band, + (0, 0, width, height), + (0, 0, width, height), + None, + None, + ) + .map_err(convert_gdal_err)?; + } else { + let uri = band.outdb_uri().ok_or_else(|| { + exec_datafusion_err!("Band {} has empty data and no outdb_uri", vrt_band_num) + })?; + let (gdal_uri, source_band_num_u32) = parse_outdb_source(uri)?; + let source_band_num: usize = source_band_num_u32.try_into().map_err(|_| { + exec_datafusion_err!("Band {} out-db band index is too large", vrt_band_num) + })?; + + let source_dataset = self.get_or_create_outdb_source(gdal, &gdal_uri, None)?; + + // If GDALGetGeoTransform(hdsSrc, ogt) fails, we fall back to (0, 1, 0, 0, 0, -1), + // which is the identity transform. + let src_geo_transform = source_dataset + .geo_transform() + .unwrap_or([0.0, 1.0, 0.0, 0.0, 0.0, -1.0]); + let (src_w, src_h) = source_dataset.raster_size(); + + // Compute source and destination windows for the VRT simple source. The VRT usually only + // clip a small portion of the source dataset. + let Some((src_window, dst_window)) = compute_vrt_simple_source_windows( + &geotransform, + (width, height), + &src_geo_transform, + (src_w as i32, src_h as i32), + )? + else { + // No spatial overlap between the target raster and the source dataset. + // Leave the VRT band as nodata. + continue; + }; + + let source_band = source_dataset + .rasterband(source_band_num) + .map_err(convert_gdal_err)?; + + vrt_band + // Avoid passing per-source NODATA to VRT simple sources; some GDAL builds + // warn that NODATA isn't supported for neighbour-sampled simple sources + // on virtual datasources. We set band-level NODATA via set_no_data_value. + .add_simple_source(&source_band, src_window, dst_window, None, None) + .map_err(convert_gdal_err)?; + + outdb_sources.push(source_dataset); } } @@ -393,8 +402,7 @@ impl<'a> GDALDatasetProvider<'a> { &self, raster: &'b R, ) -> Result> { - let bands = raster.bands(); - let num_bands = bands.len(); + let num_bands = raster.num_bands(); if num_bands == 0 { let dataset = raster_ref_to_gdal_empty(self.gdal, raster)?; @@ -408,11 +416,15 @@ impl<'a> GDALDatasetProvider<'a> { let mut indb_band_indices = Vec::with_capacity(num_bands); let mut has_outdb = false; - for i in 1..=num_bands { - let band = bands.band(i).map_err(|e| arrow_datafusion_err!(e))?; - match band.metadata().storage_type()? { - StorageType::InDb => indb_band_indices.push(i), - StorageType::OutDbRef => has_outdb = true, + for i in 0..num_bands { + let band = raster + .band(i) + .ok_or_else(|| exec_datafusion_err!("Band index {} is out of range", i))?; + if band.is_indb() { + // raster_ref_to_gdal_mem expects 1-based source band indices. + indb_band_indices.push(i + 1); + } else { + has_outdb = true; } } @@ -479,10 +491,12 @@ impl<'a> GDALDatasetProvider<'a> { #[derive(Hash, Eq, PartialEq)] struct VrtBandKey { - storage_type: StorageType, data_type: BandDataType, nodata_bits: Option, + /// Normalized GDAL-side URI, with the SedonaDB `#band=N` fragment stripped. + /// `None` for in-db bands (paired with `outdb_band_id == None`). outdb_url: Option, + /// 1-based source band index parsed from `outdb_uri`. `None` for in-db bands. outdb_band_id: Option, } @@ -497,19 +511,28 @@ struct VrtKey { impl VrtKey { fn from_raster(raster: &R) -> Result { - let metadata = raster.metadata(); - let bands = raster.bands(); - let num_bands = bands.len(); - - let geotransform = metadata.to_gdal_geotransform(); + let num_bands = raster.num_bands(); + let width = raster + .width() + .ok_or_else(|| exec_datafusion_err!("Raster has no width (spatial_shape missing)"))?; + let height = raster + .height() + .ok_or_else(|| exec_datafusion_err!("Raster has no height (spatial_shape missing)"))?; + let geotransform: [f64; 6] = raster.transform().try_into().map_err(|_| { + exec_datafusion_err!( + "Raster transform must be exactly 6 elements; got {}", + raster.transform().len() + ) + })?; let geotransform_bits = geotransform.map(f64::to_bits); let mut band_keys = Vec::with_capacity(num_bands); - for i in 1..=num_bands { - let band = bands.band(i).map_err(|e| arrow_datafusion_err!(e))?; - let band_metadata = band.metadata(); - let band_type = band_metadata.data_type()?; - let nodata_bits = match (band_metadata.nodata_value(), band_type) { + for i in 0..num_bands { + let band = raster + .band(i) + .ok_or_else(|| exec_datafusion_err!("Band index {} is out of range", i))?; + let band_type = band.data_type(); + let nodata_bits = match (band.nodata(), band_type) { (Some(bytes), BandDataType::UInt64) => { let bytes: [u8; 8] = bytes.try_into().map_err(|_| { exec_datafusion_err!("Invalid nodata byte length for UInt64") @@ -525,18 +548,26 @@ impl VrtKey { (Some(bytes), _) => Some(bytes_to_f64(bytes, &band_type)?.to_bits()), (None, _) => None, }; + let (outdb_url, outdb_band_id) = if band.is_indb() { + (None, None) + } else { + let uri = band.outdb_uri().ok_or_else(|| { + exec_datafusion_err!("Band {} has empty data and no outdb_uri", i + 1) + })?; + let (gdal_uri, band_id) = parse_outdb_source(uri)?; + (Some(normalize_outdb_source_path(&gdal_uri)), Some(band_id)) + }; band_keys.push(VrtBandKey { - storage_type: band_metadata.storage_type()?, data_type: band_type, nodata_bits, - outdb_url: band_metadata.outdb_url().map(normalize_outdb_source_path), - outdb_band_id: band_metadata.outdb_band_id(), + outdb_url, + outdb_band_id, }); } Ok(Self { - width: metadata.width(), - height: metadata.height(), + width, + height, geotransform_bits, crs: raster.crs().map(|s| s.to_string()), bands: band_keys, @@ -630,9 +661,7 @@ mod tests { use sedona_gdal::raster::types::Buffer; use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; - use sedona_schema::raster::{BandDataType, StorageType}; - use sedona_testing::rasters::{build_in_db_raster, InDbTestBand}; + use sedona_schema::raster::BandDataType; use tempfile::TempDir; use crate::gdal_common::with_gdal; @@ -660,73 +689,97 @@ mod tests { path_str } - fn build_outdb_raster(path: &str) -> arrow_array::StructArray { + /// Two-band GeoTIFF on disk: band 1 is filled with `band1_fill`, band 2 + /// with `band2_fill`. Used to exercise `#band=2` selection end-to-end. + fn create_two_band_source_tiff(temp_dir: &TempDir, band1_fill: u8, band2_fill: u8) -> String { + let path = temp_dir.path().join("two_band.tif"); + let path_str = path.to_string_lossy().to_string(); + + with_gdal(|gdal| { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver + .create_with_band_type::(&path_str, 8, 8, 2) + .unwrap(); + dataset + .set_geo_transform(&[0.0, 1.0, 0.0, 8.0, 0.0, -1.0]) + .unwrap(); + for (i, fill) in [band1_fill, band2_fill].iter().enumerate() { + let band = dataset.rasterband(i + 1).unwrap(); + let mut buffer = Buffer::new((8, 8), vec![*fill; 8 * 8]); + band.write((0, 0), (8, 8), &mut buffer).unwrap(); + } + Ok(()) + }) + .unwrap(); + + path_str + } + + type IndbBandSpec = (BandDataType, Option>, Vec); + + fn build_indb_raster_8x8(crs: Option<&str>, bands: &[IndbBandSpec]) -> StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 8, - height: 8, - upperleft_x: 0.0, - upperleft_y: 8.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, None).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![0u8]), - storage_type: StorageType::OutDbRef, - datatype: BandDataType::UInt8, - outdb_url: Some(path.to_string()), - outdb_band_id: Some(1), - }; - builder.start_band(band_metadata).unwrap(); + builder + .start_raster_2d(8, 8, 0.0, 8.0, 1.0, -1.0, 0.0, 0.0, crs) + .unwrap(); + for (data_type, nodata, data) in bands { + builder + .start_band_2d(*data_type, nodata.as_deref()) + .unwrap(); + builder.band_data_writer().append_value(data); + builder.finish_band().unwrap(); + } + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + fn build_outdb_raster(uri: &str) -> StructArray { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(8, 8, 0.0, 8.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[8, 8], + BandDataType::UInt8, + Some(&[0u8]), + Some(uri), + Some("geotiff"), + ) + .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - builder.finish().unwrap() } - fn build_mixed_raster(path: &str) -> StructArray { - let metadata = RasterMetadata { - width: 8, - height: 8, - upperleft_x: 0.0, - upperleft_y: 8.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + fn build_mixed_raster(uri: &str) -> StructArray { let mut builder = RasterBuilder::new(1); - builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); + builder + .start_raster_2d(8, 8, 0.0, 8.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) + .unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) .unwrap(); builder.band_data_writer().append_value(vec![7u8; 8 * 8]); builder.finish_band().unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![0u8]), - storage_type: StorageType::OutDbRef, - outdb_url: Some(path.to_string()), - outdb_band_id: Some(1), - }) + .start_band( + None, + &["y", "x"], + &[8, 8], + BandDataType::UInt8, + Some(&[0u8]), + Some(uri), + Some("geotiff"), + ) .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); - builder.finish_raster().unwrap(); builder.finish().unwrap() } @@ -831,17 +884,12 @@ mod tests { #[test] fn test_provider_returns_empty_dataset_for_zero_band_raster() { - let metadata = RasterMetadata { - width: 3, - height: 2, - upperleft_x: 1.0, - upperleft_y: 4.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - let raster_struct = build_in_db_raster(metadata, Some("EPSG:4326"), &[]); + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(3, 2, 1.0, 4.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) + .unwrap(); + builder.finish_raster().unwrap(); + let raster_struct = builder.finish().unwrap(); let raster_array = RasterStructArray::new(&raster_struct); let raster = raster_array.get(0).unwrap(); let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); @@ -859,34 +907,26 @@ mod tests { #[test] fn test_provider_returns_mem_dataset_for_indb_raster() { - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 2.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - let band1 = vec![11u8, 12u8, 13u8, 14u8]; - let band2 = vec![21u8, 22u8, 23u8, 24u8]; - let raster_struct = build_in_db_raster( - metadata, - Some("EPSG:4326"), - &[ - InDbTestBand { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![255u8]), - data: band1, - }, - InDbTestBand { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![0u8]), - data: band2, - }, - ], - ); + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(2, 2, 0.0, 2.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) + .unwrap(); + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![11u8, 12u8, 13u8, 14u8]); + builder.finish_band().unwrap(); + builder + .start_band_2d(BandDataType::UInt8, Some(&[0u8])) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![21u8, 22u8, 23u8, 24u8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let raster_struct = builder.finish().unwrap(); let raster_array = RasterStructArray::new(&raster_struct); let raster = raster_array.get(0).unwrap(); let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); @@ -940,34 +980,22 @@ mod tests { #[test] fn test_vrt_key_distinguishes_lossless_uint64_nodata() { - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 1.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - let raster_a = build_in_db_raster( - metadata.clone(), - None, - &[InDbTestBand { - datatype: BandDataType::UInt64, - nodata_value: Some((9_007_199_254_740_992u64).to_le_bytes().to_vec()), - data: 1u64.to_le_bytes().to_vec(), - }], - ); - let raster_b = build_in_db_raster( - metadata, - None, - &[InDbTestBand { - datatype: BandDataType::UInt64, - nodata_value: Some((9_007_199_254_740_993u64).to_le_bytes().to_vec()), - data: 1u64.to_le_bytes().to_vec(), - }], - ); + fn one_band_uint64(nodata: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(1, 1, 0.0, 1.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder + .start_band_2d(BandDataType::UInt64, Some(&nodata.to_le_bytes())) + .unwrap(); + builder.band_data_writer().append_value(1u64.to_le_bytes()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + let raster_a = one_band_uint64(9_007_199_254_740_992u64); + let raster_b = one_band_uint64(9_007_199_254_740_993u64); let key_a = super::VrtKey::from_raster(&RasterStructArray::new(&raster_a).get(0).unwrap()).unwrap(); @@ -976,4 +1004,85 @@ mod tests { assert!(key_a != key_b); } + + #[test] + fn test_provider_rejects_nd_band_in_vrt_path() { + let temp_dir = TempDir::new().unwrap(); + let path = create_source_tiff(&temp_dir); + + // Build a raster mixing one in-db 3-D band (forces N-D rejection inside + // build_vrt_from_sources) with one out-db band. + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(8, 8, 0.0, 8.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder + .start_band( + None, + &["time", "y", "x"], + &[2, 8, 8], + BandDataType::UInt8, + None, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 2 * 8 * 8]); + builder.finish_band().unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[8, 8], + BandDataType::UInt8, + Some(&[0u8]), + Some(&path), + Some("geotiff"), + ) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let raster_struct = builder.finish().unwrap(); + let raster_array = RasterStructArray::new(&raster_struct); + let raster = raster_array.get(0).unwrap(); + let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); + + let err = with_gdal(|gdal| { + let provider = GDALDatasetProvider::new(gdal, Rc::clone(&cache)); + provider.raster_ref_to_gdal(&raster) + }) + .err() + .unwrap(); + assert!(err.to_string().contains("2-dim band"), "got: {err}"); + } + + #[test] + fn test_provider_selects_outdb_band_via_band_fragment() { + let temp_dir = TempDir::new().unwrap(); + // Source TIFF: band 1 filled with 7s, band 2 filled with 99s. + let path = create_two_band_source_tiff(&temp_dir, 7u8, 99u8); + // SedonaDB convention #band=2 → second source band. + let uri = format!("{path}#band=2"); + let raster_struct = build_outdb_raster(&uri); + let raster_array = RasterStructArray::new(&raster_struct); + let raster = raster_array.get(0).unwrap(); + let cache = Rc::new(GDALDatasetCache::try_new(4, 4).unwrap()); + + let dataset = with_gdal(|gdal| { + let provider = GDALDatasetProvider::new(gdal, Rc::clone(&cache)); + provider.raster_ref_to_gdal(&raster) + }) + .unwrap(); + + let band = dataset + .as_dataset() + .rasterband(1) + .unwrap() + .read_as::((0, 0), (8, 8), (8, 8), None) + .unwrap(); + assert_eq!(band.data().to_vec(), vec![99u8; 8 * 8]); + } } diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 8e8c871fb..60c156989 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -31,10 +31,6 @@ mod gdal_common; // Temporary until https://github.com/apache/sedona-db/issues/804 is resolved. #[allow(dead_code)] mod gdal_dataset_provider; - -mod utils; - -#[cfg(test)] mod source_uri; // Re-export main dataset conversion functions @@ -42,4 +38,3 @@ pub use gdal_common::{ band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size, nodata_bytes_to_f64, nodata_f64_to_bytes, }; -pub use utils::{append_as_indb_raster, dataset_to_indb_raster}; diff --git a/rust/sedona-raster-gdal/src/utils.rs b/rust/sedona-raster-gdal/src/utils.rs deleted file mode 100644 index 30f543071..000000000 --- a/rust/sedona-raster-gdal/src/utils.rs +++ /dev/null @@ -1,474 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utility functions for loading raster data via GDAL. - -use arrow_array::StructArray; -use arrow_buffer::Buffer; -use datafusion_common::error::Result; -use datafusion_common::exec_datafusion_err; -use sedona_gdal::dataset::Dataset; -use sedona_gdal::spatial_ref::SpatialRef; - -use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::BandMetadata; -use sedona_schema::raster::{BandDataType, StorageType}; - -use crate::gdal_common::{gdal_to_band_data_type, RasterMetadataFromGdalGeoTransform}; - -/// Append a GDAL dataset as a single in-db raster to the provided [`RasterBuilder`]. -pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> Result<()> { - let (width, height) = dataset.raster_size(); - - let geotransform = dataset - .geo_transform() - .map_err(|e| exec_datafusion_err!("Failed to get geotransform: {}", e))?; - - let metadata = geotransform.to_raster_metadata(width, height); - - let crs = dataset - .spatial_ref() - .ok() - .and_then(|sr: SpatialRef| sr.to_projjson().ok()); - - builder - .start_raster(&metadata, crs.as_deref()) - .map_err(|e| exec_datafusion_err!("Failed to start raster: {}", e))?; - - let band_count = dataset.raster_count(); - for band_idx in 1..=band_count { - let band = dataset - .rasterband(band_idx) - .map_err(|e| exec_datafusion_err!("Failed to get band {}: {}", band_idx, e))?; - - let gdal_type = band.band_type(); - let band_data_type = gdal_to_band_data_type(gdal_type) - .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; - - // Get nodata value - let nodata_bytes = match band_data_type { - BandDataType::UInt64 => band - .no_data_value_u64() - .map(|no_data| no_data.to_le_bytes().to_vec()), - BandDataType::Int64 => band - .no_data_value_i64() - .map(|no_data| no_data.to_le_bytes().to_vec()), - _ => band - .no_data_value() - .map(|no_data| crate::gdal_common::nodata_f64_to_bytes(no_data, &band_data_type)), - }; - - let band_metadata = BandMetadata { - nodata_value: nodata_bytes, - storage_type: StorageType::InDb, - datatype: band_data_type, - outdb_url: None, - outdb_band_id: None, - }; - - builder - .start_band(band_metadata) - .map_err(|e| exec_datafusion_err!("Failed to start band: {}", e))?; - - let band_data = band - .read_as_bytes((0, 0), (width, height), (width, height), None) - .map_err(|e| exec_datafusion_err!("Failed to read band {} data: {}", band_idx, e))?; - let band_data_len = u32::try_from(band_data.len()) - .map_err(|_| exec_datafusion_err!("Band {} data too large for Arrow view", band_idx))?; - let block = builder - .band_data_writer() - .append_block(Buffer::from_vec(band_data)); - builder - .band_data_writer() - .try_append_view(block, 0, band_data_len) - .map_err(|e| exec_datafusion_err!("Failed to append band {} data: {}", band_idx, e))?; - - builder - .finish_band() - .map_err(|e| exec_datafusion_err!("Failed to finish band: {}", e))?; - } - - builder - .finish_raster() - .map_err(|e| exec_datafusion_err!("Failed to finish raster: {}", e))?; - - Ok(()) -} - -/// Materialize a single GDAL dataset as an in-db raster `StructArray`. -pub fn dataset_to_indb_raster(dataset: &Dataset) -> Result { - let mut builder = RasterBuilder::new(1); - append_as_indb_raster(dataset, &mut builder)?; - - builder - .finish() - .map_err(|e| exec_datafusion_err!("Failed to build raster: {}", e)) -} - -#[cfg(test)] -mod tests { - use super::{append_as_indb_raster, dataset_to_indb_raster}; - - use arrow_array::StructArray; - use datafusion_common::exec_datafusion_err; - use sedona_gdal::dataset::Dataset; - use sedona_gdal::gdal::Gdal; - use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY}; - use sedona_gdal::raster::types::Buffer; - use sedona_gdal::raster::types::DatasetOptions; - use sedona_raster::array::RasterStructArray; - use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::RasterRef; - use sedona_schema::raster::{BandDataType, StorageType}; - use tempfile::TempDir; - - use crate::gdal_common::with_gdal; - - fn open_dataset(gdal: &Gdal, path: &str) -> sedona_gdal::errors::Result { - gdal.open_ex_with_options( - path, - DatasetOptions { - open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, - ..Default::default() - }, - ) - } - - fn load_as_indb_raster(gdal: &Gdal, path: &str) -> datafusion_common::Result { - let dataset = open_dataset(gdal, path).map_err(crate::gdal_common::convert_gdal_err)?; - dataset_to_indb_raster(&dataset) - } - - fn write_uint64_tiff(gdal: &Gdal, path: &str, nodata: u64, data: Vec) { - let driver = gdal.get_driver_by_name("GTiff").unwrap(); - let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); - dataset - .set_geo_transform(&[100.0, 2.0, 0.0, 200.0, 0.0, -2.0]) - .unwrap(); - dataset.set_projection("EPSG:4326").unwrap(); - let band = dataset.rasterband(1).unwrap(); - band.set_no_data_value_u64(Some(nodata)).unwrap(); - let mut buffer = Buffer::new((2, 2), data); - band.write((0, 0), (2, 2), &mut buffer).unwrap(); - } - - fn write_int64_tiff(gdal: &Gdal, path: &str, nodata: i64, data: Vec) { - let driver = gdal.get_driver_by_name("GTiff").unwrap(); - let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); - dataset - .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) - .unwrap(); - let band = dataset.rasterband(1).unwrap(); - band.set_no_data_value_i64(Some(nodata)).unwrap(); - let mut buffer = Buffer::new((2, 2), data); - band.write((0, 0), (2, 2), &mut buffer).unwrap(); - } - - fn write_uint16_tiff(gdal: &Gdal, path: &str, nodata: u16, data: Vec) { - let driver = gdal.get_driver_by_name("GTiff").unwrap(); - let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); - dataset - .set_geo_transform(&[0.0, 0.5, 0.0, 1.0, 0.0, -0.5]) - .unwrap(); - dataset.set_projection("EPSG:4326").unwrap(); - let band = dataset.rasterband(1).unwrap(); - band.set_no_data_value(Some(nodata as f64)).unwrap(); - let mut buffer = Buffer::new((2, 2), data); - band.write((0, 0), (2, 2), &mut buffer).unwrap(); - } - - fn write_byte_tiff(gdal: &Gdal, path: &str) { - let driver = gdal.get_driver_by_name("GTiff").unwrap(); - let dataset = driver.create_with_band_type::(path, 3, 2, 1).unwrap(); - dataset - .set_geo_transform(&[1.5, 0.25, 0.0, 4.5, 0.0, -0.25]) - .unwrap(); - dataset.set_projection("EPSG:4326").unwrap(); - let band = dataset.rasterband(1).unwrap(); - band.set_no_data_value(Some(255.0)).unwrap(); - let mut buffer = Buffer::new((3, 2), vec![1u8, 2, 3, 4, 5, 6]); - band.write((0, 0), (3, 2), &mut buffer).unwrap(); - } - - fn write_multi_band_tiff(gdal: &Gdal, path: &str) { - let driver = gdal.get_driver_by_name("GTiff").unwrap(); - let dataset = driver.create(path, 2, 2, 2).unwrap(); - dataset - .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) - .unwrap(); - - let band1 = dataset.rasterband(1).unwrap(); - // GeoTIFF stores a single dataset-level nodata value, so use the same nodata - // for both bands in this fixture to keep the assertions format-accurate. - band1.set_no_data_value(Some(255.0)).unwrap(); - let mut buffer1 = Buffer::new((2, 2), vec![10u8, 11, 12, 13]); - band1.write((0, 0), (2, 2), &mut buffer1).unwrap(); - - let band2 = dataset.rasterband(2).unwrap(); - band2.set_no_data_value(Some(255.0)).unwrap(); - let mut buffer2 = Buffer::new((2, 2), vec![100u8, 0, 200, 0]); - band2.write((0, 0), (2, 2), &mut buffer2).unwrap(); - } - - fn build_multi_band_mem_dataset(gdal: &Gdal) -> Dataset { - let driver = gdal.get_driver_by_name("MEM").unwrap(); - let dataset = driver.create("", 2, 2, 2).unwrap(); - dataset - .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) - .unwrap(); - dataset.set_projection("EPSG:4326").unwrap(); - - let band1 = dataset.rasterband(1).unwrap(); - band1.set_no_data_value(Some(0.0)).unwrap(); - let mut buffer1 = Buffer::new((2, 2), vec![10u8, 11, 12, 13]); - band1.write((0, 0), (2, 2), &mut buffer1).unwrap(); - - let band2 = dataset.rasterband(2).unwrap(); - band2.set_no_data_value(Some(255.0)).unwrap(); - let mut buffer2 = Buffer::new((2, 2), vec![100u8, 0, 200, 0]); - band2.write((0, 0), (2, 2), &mut buffer2).unwrap(); - - dataset - } - - #[test] - fn dataset_to_indb_raster_reads_single_band_geotiff() { - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("byte.tif"); - let path_str = path.to_string_lossy().to_string(); - - with_gdal(|gdal| { - write_byte_tiff(gdal, &path_str); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - - assert_eq!(raster.metadata().width(), 3); - assert_eq!(raster.metadata().height(), 2); - assert_eq!(raster.metadata().upper_left_x(), 1.5); - assert_eq!(raster.metadata().upper_left_y(), 4.5); - assert!(raster.crs().is_some()); - assert_eq!(band.metadata().storage_type().unwrap(), StorageType::InDb); - assert_eq!(band.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band.metadata().nodata_value().unwrap(), [255u8]); - assert_eq!(band.data(), [1u8, 2, 3, 4, 5, 6]); - } - - #[test] - fn dataset_to_indb_raster_preserves_uint64_nodata_and_data() { - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("uint64.tif"); - let path_str = path.to_string_lossy().to_string(); - let nodata = 9_007_199_254_740_993u64; - - with_gdal(|gdal| { - write_uint64_tiff(gdal, &path_str, nodata, vec![1, 2, 3, 4]); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - - assert_eq!(raster.metadata().width(), 2); - assert_eq!(raster.metadata().height(), 2); - assert_eq!(raster.metadata().upper_left_x(), 100.0); - assert_eq!(raster.metadata().upper_left_y(), 200.0); - assert_eq!(band.metadata().data_type().unwrap(), BandDataType::UInt64); - assert_eq!( - band.metadata().nodata_value().unwrap(), - &nodata.to_le_bytes() - ); - - let pixels: Vec = band - .data() - .chunks_exact(8) - .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap())) - .collect(); - assert_eq!(pixels, vec![1, 2, 3, 4]); - } - - #[test] - fn dataset_to_indb_raster_preserves_int64_nodata_and_data() { - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("int64.tif"); - let path_str = path.to_string_lossy().to_string(); - let nodata = -9_007_199_254_740_993i64; - - with_gdal(|gdal| { - write_int64_tiff(gdal, &path_str, nodata, vec![-1, -2, -3, -4]); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - - assert_eq!(band.metadata().data_type().unwrap(), BandDataType::Int64); - assert_eq!( - band.metadata().nodata_value().unwrap(), - &nodata.to_le_bytes() - ); - - let pixels: Vec = band - .data() - .chunks_exact(8) - .map(|chunk| i64::from_le_bytes(chunk.try_into().unwrap())) - .collect(); - assert_eq!(pixels, vec![-1, -2, -3, -4]); - } - - #[test] - fn dataset_to_indb_raster_preserves_uint16_nodata_and_data() { - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("uint16.tif"); - let path_str = path.to_string_lossy().to_string(); - let nodata = 513u16; - - with_gdal(|gdal| { - write_uint16_tiff(gdal, &path_str, nodata, vec![1, 256, 511, 1024]); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - - assert_eq!(band.metadata().data_type().unwrap(), BandDataType::UInt16); - assert_eq!( - band.metadata().nodata_value().unwrap(), - &nodata.to_le_bytes() - ); - - let pixels: Vec = band - .data() - .chunks_exact(2) - .map(|chunk| u16::from_le_bytes(chunk.try_into().unwrap())) - .collect(); - assert_eq!(pixels, vec![1, 256, 511, 1024]); - } - - #[test] - fn dataset_to_indb_raster_preserves_multi_band_data_and_nodata() { - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("multi.tif"); - let path_str = path.to_string_lossy().to_string(); - - with_gdal(|gdal| { - write_multi_band_tiff(gdal, &path_str); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band1 = raster.bands().band(1).unwrap(); - let band2 = raster.bands().band(2).unwrap(); - - assert_eq!(raster.bands().len(), 2); - assert_eq!(band1.metadata().storage_type().unwrap(), StorageType::InDb); - assert_eq!(band1.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band1.metadata().nodata_value().unwrap(), [255u8]); - assert_eq!(band1.data(), [10u8, 11, 12, 13]); - - assert_eq!(band2.metadata().storage_type().unwrap(), StorageType::InDb); - assert_eq!(band2.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band2.metadata().nodata_value().unwrap(), [255u8]); - assert_eq!(band2.data(), [100u8, 0, 200, 0]); - } - - #[test] - fn dataset_to_indb_raster_preserves_per_band_nodata_for_mem_dataset() { - let raster_array = with_gdal(|gdal| { - let dataset = build_multi_band_mem_dataset(gdal); - dataset_to_indb_raster(&dataset) - }) - .unwrap(); - - let raster_struct = RasterStructArray::new(&raster_array); - let raster = raster_struct.get(0).unwrap(); - let band1 = raster.bands().band(1).unwrap(); - let band2 = raster.bands().band(2).unwrap(); - - assert_eq!(raster.bands().len(), 2); - assert_eq!(band1.metadata().storage_type().unwrap(), StorageType::InDb); - assert_eq!(band1.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band1.metadata().nodata_value().unwrap(), [0u8]); - assert_eq!(band1.data(), [10u8, 11, 12, 13]); - - assert_eq!(band2.metadata().storage_type().unwrap(), StorageType::InDb); - assert_eq!(band2.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band2.metadata().nodata_value().unwrap(), [255u8]); - assert_eq!(band2.data(), [100u8, 0, 200, 0]); - } - - #[test] - fn append_as_indb_raster_appends_multiple_rasters() { - let temp_dir = TempDir::new().unwrap(); - let byte_path = temp_dir.path().join("byte.tif"); - let byte_path_str = byte_path.to_string_lossy().to_string(); - let multi_path = temp_dir.path().join("multi.tif"); - let multi_path_str = multi_path.to_string_lossy().to_string(); - - with_gdal(|gdal| { - write_byte_tiff(gdal, &byte_path_str); - write_multi_band_tiff(gdal, &multi_path_str); - Ok::<_, datafusion_common::DataFusionError>(()) - }) - .unwrap(); - - let raster_array = with_gdal(|gdal| { - let byte_dataset = - open_dataset(gdal, &byte_path_str).map_err(crate::gdal_common::convert_gdal_err)?; - let multi_dataset = open_dataset(gdal, &multi_path_str) - .map_err(crate::gdal_common::convert_gdal_err)?; - - let mut builder = RasterBuilder::new(2); - append_as_indb_raster(&byte_dataset, &mut builder)?; - append_as_indb_raster(&multi_dataset, &mut builder)?; - builder - .finish() - .map_err(|e| exec_datafusion_err!("Failed to build raster array: {}", e)) - }) - .unwrap(); - - let raster_struct = RasterStructArray::new(&raster_array); - assert_eq!(raster_struct.len(), 2); - - let first = raster_struct.get(0).unwrap(); - assert_eq!(first.metadata().width(), 3); - assert_eq!(first.metadata().height(), 2); - assert_eq!(first.bands().len(), 1); - - let second = raster_struct.get(1).unwrap(); - assert_eq!(second.metadata().width(), 2); - assert_eq!(second.metadata().height(), 2); - assert_eq!(second.bands().len(), 2); - } -} From f8da8e0b07ce4e49be8153c73649ea823624220d Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 6 May 2026 12:58:29 -0700 Subject: [PATCH 5/6] fix(raster-gdal): keep Cow::Owned band bytes alive for GDAL MEM dataset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `raster_ref_to_gdal_mem` previously returned a `Result` and guarded against `BandRef::contiguous_data()` returning `Cow::Owned` with a runtime tripwire ("Internal: contiguous_data must be borrowed for is_2d bands; got owned"). The check was correct — handing GDAL a pointer into a `Vec` that drops at the end of the iteration would dangle — but it ties an internal invariant ("`is_2d` ⇒ Borrowed") to incidental properties of today's reader. Any future copy path in the reader (compression, BinaryView block-boundary stitching, alignment fix-up, sliced/broadcast/transposed views from #813 / #750) would detonate the tripwire on perfectly valid 2-D rasters. Change: return `Result<(Dataset, Vec>)>`. On `Cow::Borrowed` the GDAL band still points directly at the StructArray buffer (zero-copy). On `Cow::Owned` we move the `Vec` out of the Cow without copying — the reader's existing materialization is the only allocation — and stash it in the returned vector. The caller (the provider in `gdal_dataset_provider.rs`) parks it in a new `RasterDataset::_owned_band_bytes` field that lives as long as the MEM dataset that holds the pointers. `raster_ref_to_gdal_empty` discards the always-empty vector. --- rust/sedona-raster-gdal/src/gdal_common.rs | 55 ++++++++++++------- .../src/gdal_dataset_provider.rs | 20 +++++-- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/rust/sedona-raster-gdal/src/gdal_common.rs b/rust/sedona-raster-gdal/src/gdal_common.rs index 8e5295964..371e6b2d9 100644 --- a/rust/sedona-raster-gdal/src/gdal_common.rs +++ b/rust/sedona-raster-gdal/src/gdal_common.rs @@ -161,16 +161,23 @@ fn resolve_band( }) } -/// This function creates a GDAL dataset backed by the MEM driver that directly -/// references the band data stored in the [RasterRef]. No data copying occurs - -/// the GDAL bands point to the same memory as the data buffer held by [RasterRef]. +/// Build a GDAL MEM dataset whose bands point at the bytes held by `raster`. +/// +/// Each band's bytes come from `BandRef::contiguous_data()`. When that returns +/// `Cow::Borrowed`, the GDAL band points directly at the StructArray's +/// backing buffer (zero-copy); the caller must keep `raster` alive for the +/// dataset's lifetime. When it returns `Cow::Owned` (e.g. a sliced or +/// permuted view materialized by the reader), the moved `Vec` is +/// returned alongside the dataset and the caller must keep it alive too. /// /// # Arguments /// * `raster` - The RasterRef value /// * `band_indices` - The indices of the bands to include in the GDAL dataset (1-based) /// /// # Returns -/// A [`Dataset`] that provides access to the GDAL dataset. +/// A pair `(Dataset, Vec>)`. The second element holds any +/// reader-allocated band bytes that GDAL pointers may reference; it must +/// outlive the dataset. /// /// # Errors /// Returns an error if: @@ -182,7 +189,7 @@ pub unsafe fn raster_ref_to_gdal_mem( gdal: &Gdal, raster: &R, band_indices: &[usize], -) -> Result { +) -> Result<(Dataset, Vec>)> { let width = raster .width() .ok_or_else(|| exec_datafusion_err!("Raster has no width (spatial_shape missing)"))? @@ -195,7 +202,12 @@ pub unsafe fn raster_ref_to_gdal_mem( // Create internal MEM dataset via sedona-gdal shim to avoid open dataset list contention. let mut mem_ds_builder = MemDatasetBuilder::new(width, height); - // Add bands with DATAPOINTER option (zero-copy) + // Reader-allocated band bytes (Cow::Owned). Each entry is moved out of + // the Cow without a copy and must outlive the dataset, since GDAL holds + // a raw pointer into it. + let mut owned_band_bytes: Vec> = Vec::new(); + + // Add bands with DATAPOINTER option. // // Note: GDALAddBand always appends a new band, so the destination band index // is sequential (1..=band_indices.len()), even if the source band indices are @@ -218,22 +230,20 @@ pub unsafe fn raster_ref_to_gdal_mem( let band_type = band.data_type(); let gdal_type = band_data_type_to_gdal(&band_type); - // contiguous_data() is Cow::Borrowed for is_2d identity views; the - // borrow points at the StructArray's backing buffer, which outlives - // the dataset (held by the caller). For Cow::Owned the pointer would - // dangle the moment the Cow drops, so we reject that case loudly. let band_data = band .contiguous_data() .map_err(|e| arrow_datafusion_err!(e))?; - let bytes: &[u8] = match &band_data { - Cow::Borrowed(b) => b, - Cow::Owned(_) => { - return exec_err!( - "Internal: contiguous_data must be borrowed for is_2d bands; got owned" - ); + // For Cow::Borrowed the pointer is into the StructArray (caller keeps + // it alive). For Cow::Owned we move the Vec into `owned_band_bytes` + // — no extra copy of the reader's materialization — and point GDAL + // at it; the Vec is kept alive alongside the returned Dataset. + let data_ptr: *const u8 = match band_data { + Cow::Borrowed(b) => b.as_ptr(), + Cow::Owned(v) => { + owned_band_bytes.push(v); + owned_band_bytes.last().unwrap().as_ptr() } }; - let data_ptr = bytes.as_ptr(); unsafe { mem_ds_builder = mem_ds_builder.add_band(gdal_type, data_ptr as *mut u8); } @@ -298,14 +308,17 @@ pub unsafe fn raster_ref_to_gdal_mem( } } - Ok(dataset) + Ok((dataset, owned_band_bytes)) } pub fn raster_ref_to_gdal_empty(gdal: &Gdal, raster: &R) -> Result { unsafe { // SAFETY: raster_ref_to_gdal_mem is safe to call with an empty band list. The - // returned dataset will have zero bands and references no external memory. - raster_ref_to_gdal_mem(gdal, raster, &[]) + // returned dataset has zero bands, references no external memory, and the + // owned-bytes Vec is necessarily empty. + let (dataset, owned) = raster_ref_to_gdal_mem(gdal, raster, &[])?; + debug_assert!(owned.is_empty()); + Ok(dataset) } } @@ -782,7 +795,7 @@ mod tests { let raster = raster_struct_array.get(0).unwrap(); with_gdal(|gdal| { - let dataset = unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[3, 1])? }; + let (dataset, _owned) = unsafe { raster_ref_to_gdal_mem(gdal, &raster, &[3, 1])? }; assert_eq!(dataset.raster_size(), (2, 2)); assert_eq!(dataset.raster_count(), 2); assert_eq!( diff --git a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs index 4fb774d70..9fb8c071c 100644 --- a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs +++ b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs @@ -68,6 +68,11 @@ pub(crate) struct RasterDataset<'a> { _gdal_mem_source: Option>, /// External datasets referenced by the VRT; kept alive for the lifetime of this struct. _gdal_outdb_sources: Vec>, + /// Reader-allocated band bytes that GDAL pointers in the MEM dataset may + /// reference (i.e. bytes returned by `BandRef::contiguous_data()` as + /// `Cow::Owned`, moved here without a copy). Kept alive for as long as + /// the MEM dataset that holds the pointers. + _owned_band_bytes: Vec>, /// Binds this dataset's lifetime to the borrowed source raster. _source_raster: PhantomData<&'a dyn RasterRef>, } @@ -410,6 +415,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset: Rc::new(dataset), _gdal_mem_source: None, _gdal_outdb_sources: Vec::new(), + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -428,12 +434,12 @@ impl<'a> GDALDatasetProvider<'a> { } } - let mut gdal_mem_source = if !indb_band_indices.is_empty() { - Some(Rc::new(unsafe { - raster_ref_to_gdal_mem(self.gdal, raster, &indb_band_indices)? - })) + let (mut gdal_mem_source, owned_band_bytes) = if !indb_band_indices.is_empty() { + let (mem_ds, owned) = + unsafe { raster_ref_to_gdal_mem(self.gdal, raster, &indb_band_indices)? }; + (Some(Rc::new(mem_ds)), owned) } else { - None + (None, Vec::new()) }; if !has_outdb { @@ -442,6 +448,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: None, _gdal_outdb_sources: Vec::new(), + _owned_band_bytes: owned_band_bytes, _source_raster: PhantomData, }); } @@ -453,6 +460,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset: Rc::clone(&cached.dataset), _gdal_mem_source: None, _gdal_outdb_sources: cached.outdb_sources.clone(), + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -472,6 +480,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: None, _gdal_outdb_sources: outdb_sources, + _owned_band_bytes: Vec::new(), _source_raster: PhantomData, }); } @@ -484,6 +493,7 @@ impl<'a> GDALDatasetProvider<'a> { dataset, _gdal_mem_source: gdal_mem_source, _gdal_outdb_sources: outdb_sources, + _owned_band_bytes: owned_band_bytes, _source_raster: PhantomData, }) } From fed97eace136f98b47e58138189b0cc146dd6370 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 6 May 2026 12:23:50 -0700 Subject: [PATCH 6/6] feat(raster-gdal): port indb-raster loader utilities to canonical N-D schema Reintroduces `append_as_indb_raster` and `dataset_to_indb_raster` (deleted on the N-D-schema branch because they used the legacy `BandMetadata`/`StorageType`/`band_data_writer` API). Reads are written against the canonical `["y", "x"]` 2-D schema using `start_raster_2d` / `start_band_2d` and `band_data_writer().append_value`. The full test suite (single-band GeoTIFF, uint64 / int64 / uint16 nodata round-trips, multi-band GeoTIFF, per-band MEM nodata, multi-raster append) is ported with assertions rewritten against the new `RasterRef` / `BandRef` accessors and `is_indb()`. --- rust/sedona-raster-gdal/src/lib.rs | 2 + rust/sedona-raster-gdal/src/utils.rs | 467 +++++++++++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100644 rust/sedona-raster-gdal/src/utils.rs diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 60c156989..5daffc85f 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -32,9 +32,11 @@ mod gdal_common; #[allow(dead_code)] mod gdal_dataset_provider; mod source_uri; +mod utils; // Re-export main dataset conversion functions pub use gdal_common::{ band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size, nodata_bytes_to_f64, nodata_f64_to_bytes, }; +pub use utils::{append_as_indb_raster, dataset_to_indb_raster}; diff --git a/rust/sedona-raster-gdal/src/utils.rs b/rust/sedona-raster-gdal/src/utils.rs new file mode 100644 index 000000000..559c0232e --- /dev/null +++ b/rust/sedona-raster-gdal/src/utils.rs @@ -0,0 +1,467 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utility functions for loading raster data via GDAL into the canonical +//! N-D Arrow schema. + +use arrow_array::StructArray; +use datafusion_common::error::Result; +use datafusion_common::exec_datafusion_err; +use sedona_gdal::dataset::Dataset; +use sedona_gdal::spatial_ref::SpatialRef; + +use sedona_raster::builder::RasterBuilder; +use sedona_schema::raster::BandDataType; + +use crate::gdal_common::{gdal_to_band_data_type, nodata_f64_to_bytes}; + +/// Append a GDAL dataset as a single in-db raster to the provided [`RasterBuilder`]. +/// +/// All bands are written as 2-D `["y", "x"]` bands with the canonical identity +/// view. Band data is read from GDAL as a contiguous row-major byte buffer. +pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> Result<()> { + let (width, height) = dataset.raster_size(); + + let geotransform = dataset + .geo_transform() + .map_err(|e| exec_datafusion_err!("Failed to get geotransform: {}", e))?; + let [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y] = geotransform; + + let crs = dataset + .spatial_ref() + .ok() + .and_then(|sr: SpatialRef| sr.to_projjson().ok()); + + let width_u64 = u64::try_from(width) + .map_err(|_| exec_datafusion_err!("Raster width {} does not fit in u64", width))?; + let height_u64 = u64::try_from(height) + .map_err(|_| exec_datafusion_err!("Raster height {} does not fit in u64", height))?; + + builder + .start_raster_2d( + width_u64, + height_u64, + origin_x, + origin_y, + scale_x, + scale_y, + skew_x, + skew_y, + crs.as_deref(), + ) + .map_err(|e| exec_datafusion_err!("Failed to start raster: {}", e))?; + + let band_count = dataset.raster_count(); + for band_idx in 1..=band_count { + let band = dataset + .rasterband(band_idx) + .map_err(|e| exec_datafusion_err!("Failed to get band {}: {}", band_idx, e))?; + + let gdal_type = band.band_type(); + let band_data_type = gdal_to_band_data_type(gdal_type) + .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; + + let nodata_bytes = match band_data_type { + BandDataType::UInt64 => band + .no_data_value_u64() + .map(|no_data| no_data.to_le_bytes().to_vec()), + BandDataType::Int64 => band + .no_data_value_i64() + .map(|no_data| no_data.to_le_bytes().to_vec()), + _ => band + .no_data_value() + .map(|no_data| nodata_f64_to_bytes(no_data, &band_data_type)), + }; + + builder + .start_band_2d(band_data_type, nodata_bytes.as_deref()) + .map_err(|e| exec_datafusion_err!("Failed to start band: {}", e))?; + + let band_data = band + .read_as_bytes((0, 0), (width, height), (width, height), None) + .map_err(|e| exec_datafusion_err!("Failed to read band {} data: {}", band_idx, e))?; + builder.band_data_writer().append_value(&band_data); + + builder + .finish_band() + .map_err(|e| exec_datafusion_err!("Failed to finish band: {}", e))?; + } + + builder + .finish_raster() + .map_err(|e| exec_datafusion_err!("Failed to finish raster: {}", e))?; + + Ok(()) +} + +/// Materialize a single GDAL dataset as an in-db raster `StructArray`. +pub fn dataset_to_indb_raster(dataset: &Dataset) -> Result { + let mut builder = RasterBuilder::new(1); + append_as_indb_raster(dataset, &mut builder)?; + + builder + .finish() + .map_err(|e| exec_datafusion_err!("Failed to build raster: {}", e)) +} + +#[cfg(test)] +mod tests { + use super::{append_as_indb_raster, dataset_to_indb_raster}; + + use arrow_array::StructArray; + use datafusion_common::exec_datafusion_err; + use sedona_gdal::dataset::Dataset; + use sedona_gdal::gdal::Gdal; + use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY}; + use sedona_gdal::raster::types::Buffer; + use sedona_gdal::raster::types::DatasetOptions; + use sedona_raster::array::RasterStructArray; + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::RasterRef; + use sedona_schema::raster::BandDataType; + use tempfile::TempDir; + + use crate::gdal_common::with_gdal; + + fn open_dataset(gdal: &Gdal, path: &str) -> sedona_gdal::errors::Result { + gdal.open_ex_with_options( + path, + DatasetOptions { + open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, + ..Default::default() + }, + ) + } + + fn load_as_indb_raster(gdal: &Gdal, path: &str) -> datafusion_common::Result { + let dataset = open_dataset(gdal, path).map_err(crate::gdal_common::convert_gdal_err)?; + dataset_to_indb_raster(&dataset) + } + + fn write_uint64_tiff(gdal: &Gdal, path: &str, nodata: u64, data: Vec) { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); + dataset + .set_geo_transform(&[100.0, 2.0, 0.0, 200.0, 0.0, -2.0]) + .unwrap(); + dataset.set_projection("EPSG:4326").unwrap(); + let band = dataset.rasterband(1).unwrap(); + band.set_no_data_value_u64(Some(nodata)).unwrap(); + let mut buffer = Buffer::new((2, 2), data); + band.write((0, 0), (2, 2), &mut buffer).unwrap(); + } + + fn write_int64_tiff(gdal: &Gdal, path: &str, nodata: i64, data: Vec) { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); + dataset + .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) + .unwrap(); + let band = dataset.rasterband(1).unwrap(); + band.set_no_data_value_i64(Some(nodata)).unwrap(); + let mut buffer = Buffer::new((2, 2), data); + band.write((0, 0), (2, 2), &mut buffer).unwrap(); + } + + fn write_uint16_tiff(gdal: &Gdal, path: &str, nodata: u16, data: Vec) { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); + dataset + .set_geo_transform(&[0.0, 0.5, 0.0, 1.0, 0.0, -0.5]) + .unwrap(); + dataset.set_projection("EPSG:4326").unwrap(); + let band = dataset.rasterband(1).unwrap(); + band.set_no_data_value(Some(nodata as f64)).unwrap(); + let mut buffer = Buffer::new((2, 2), data); + band.write((0, 0), (2, 2), &mut buffer).unwrap(); + } + + fn write_byte_tiff(gdal: &Gdal, path: &str) { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver.create_with_band_type::(path, 3, 2, 1).unwrap(); + dataset + .set_geo_transform(&[1.5, 0.25, 0.0, 4.5, 0.0, -0.25]) + .unwrap(); + dataset.set_projection("EPSG:4326").unwrap(); + let band = dataset.rasterband(1).unwrap(); + band.set_no_data_value(Some(255.0)).unwrap(); + let mut buffer = Buffer::new((3, 2), vec![1u8, 2, 3, 4, 5, 6]); + band.write((0, 0), (3, 2), &mut buffer).unwrap(); + } + + fn write_multi_band_tiff(gdal: &Gdal, path: &str) { + let driver = gdal.get_driver_by_name("GTiff").unwrap(); + let dataset = driver.create(path, 2, 2, 2).unwrap(); + dataset + .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) + .unwrap(); + + let band1 = dataset.rasterband(1).unwrap(); + // GeoTIFF stores a single dataset-level nodata value, so use the same nodata + // for both bands in this fixture to keep the assertions format-accurate. + band1.set_no_data_value(Some(255.0)).unwrap(); + let mut buffer1 = Buffer::new((2, 2), vec![10u8, 11, 12, 13]); + band1.write((0, 0), (2, 2), &mut buffer1).unwrap(); + + let band2 = dataset.rasterband(2).unwrap(); + band2.set_no_data_value(Some(255.0)).unwrap(); + let mut buffer2 = Buffer::new((2, 2), vec![100u8, 0, 200, 0]); + band2.write((0, 0), (2, 2), &mut buffer2).unwrap(); + } + + fn build_multi_band_mem_dataset(gdal: &Gdal) -> Dataset { + let driver = gdal.get_driver_by_name("MEM").unwrap(); + let dataset = driver.create("", 2, 2, 2).unwrap(); + dataset + .set_geo_transform(&[10.0, 1.0, 0.0, 20.0, 0.0, -1.0]) + .unwrap(); + dataset.set_projection("EPSG:4326").unwrap(); + + let band1 = dataset.rasterband(1).unwrap(); + band1.set_no_data_value(Some(0.0)).unwrap(); + let mut buffer1 = Buffer::new((2, 2), vec![10u8, 11, 12, 13]); + band1.write((0, 0), (2, 2), &mut buffer1).unwrap(); + + let band2 = dataset.rasterband(2).unwrap(); + band2.set_no_data_value(Some(255.0)).unwrap(); + let mut buffer2 = Buffer::new((2, 2), vec![100u8, 0, 200, 0]); + band2.write((0, 0), (2, 2), &mut buffer2).unwrap(); + + dataset + } + + #[test] + fn dataset_to_indb_raster_reads_single_band_geotiff() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("byte.tif"); + let path_str = path.to_string_lossy().to_string(); + + with_gdal(|gdal| { + write_byte_tiff(gdal, &path_str); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band = raster.band(0).unwrap(); + + assert_eq!(raster.width().unwrap(), 3); + assert_eq!(raster.height().unwrap(), 2); + assert_eq!(raster.transform()[0], 1.5); + assert_eq!(raster.transform()[3], 4.5); + assert!(raster.crs().is_some()); + assert!(band.is_indb()); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata().unwrap(), [255u8]); + assert_eq!(&*band.contiguous_data().unwrap(), &[1u8, 2, 3, 4, 5, 6]); + } + + #[test] + fn dataset_to_indb_raster_preserves_uint64_nodata_and_data() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("uint64.tif"); + let path_str = path.to_string_lossy().to_string(); + let nodata = 9_007_199_254_740_993u64; + + with_gdal(|gdal| { + write_uint64_tiff(gdal, &path_str, nodata, vec![1, 2, 3, 4]); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band = raster.band(0).unwrap(); + + assert_eq!(raster.width().unwrap(), 2); + assert_eq!(raster.height().unwrap(), 2); + assert_eq!(raster.transform()[0], 100.0); + assert_eq!(raster.transform()[3], 200.0); + assert_eq!(band.data_type(), BandDataType::UInt64); + assert_eq!(band.nodata().unwrap(), &nodata.to_le_bytes()); + + let pixels: Vec = band + .contiguous_data() + .unwrap() + .chunks_exact(8) + .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap())) + .collect(); + assert_eq!(pixels, vec![1, 2, 3, 4]); + } + + #[test] + fn dataset_to_indb_raster_preserves_int64_nodata_and_data() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("int64.tif"); + let path_str = path.to_string_lossy().to_string(); + let nodata = -9_007_199_254_740_993i64; + + with_gdal(|gdal| { + write_int64_tiff(gdal, &path_str, nodata, vec![-1, -2, -3, -4]); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band = raster.band(0).unwrap(); + + assert_eq!(band.data_type(), BandDataType::Int64); + assert_eq!(band.nodata().unwrap(), &nodata.to_le_bytes()); + + let pixels: Vec = band + .contiguous_data() + .unwrap() + .chunks_exact(8) + .map(|chunk| i64::from_le_bytes(chunk.try_into().unwrap())) + .collect(); + assert_eq!(pixels, vec![-1, -2, -3, -4]); + } + + #[test] + fn dataset_to_indb_raster_preserves_uint16_nodata_and_data() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("uint16.tif"); + let path_str = path.to_string_lossy().to_string(); + let nodata = 513u16; + + with_gdal(|gdal| { + write_uint16_tiff(gdal, &path_str, nodata, vec![1, 256, 511, 1024]); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band = raster.band(0).unwrap(); + + assert_eq!(band.data_type(), BandDataType::UInt16); + assert_eq!(band.nodata().unwrap(), &nodata.to_le_bytes()); + + let pixels: Vec = band + .contiguous_data() + .unwrap() + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes(chunk.try_into().unwrap())) + .collect(); + assert_eq!(pixels, vec![1, 256, 511, 1024]); + } + + #[test] + fn dataset_to_indb_raster_preserves_multi_band_data_and_nodata() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("multi.tif"); + let path_str = path.to_string_lossy().to_string(); + + with_gdal(|gdal| { + write_multi_band_tiff(gdal, &path_str); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| load_as_indb_raster(gdal, &path_str)).unwrap(); + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band1 = raster.band(0).unwrap(); + let band2 = raster.band(1).unwrap(); + + assert_eq!(raster.num_bands(), 2); + assert!(band1.is_indb()); + assert_eq!(band1.data_type(), BandDataType::UInt8); + assert_eq!(band1.nodata().unwrap(), [255u8]); + assert_eq!(&*band1.contiguous_data().unwrap(), &[10u8, 11, 12, 13]); + + assert!(band2.is_indb()); + assert_eq!(band2.data_type(), BandDataType::UInt8); + assert_eq!(band2.nodata().unwrap(), [255u8]); + assert_eq!(&*band2.contiguous_data().unwrap(), &[100u8, 0, 200, 0]); + } + + #[test] + fn dataset_to_indb_raster_preserves_per_band_nodata_for_mem_dataset() { + let raster_array = with_gdal(|gdal| { + let dataset = build_multi_band_mem_dataset(gdal); + dataset_to_indb_raster(&dataset) + }) + .unwrap(); + + let raster_struct = RasterStructArray::new(&raster_array); + let raster = raster_struct.get(0).unwrap(); + let band1 = raster.band(0).unwrap(); + let band2 = raster.band(1).unwrap(); + + assert_eq!(raster.num_bands(), 2); + assert!(band1.is_indb()); + assert_eq!(band1.data_type(), BandDataType::UInt8); + assert_eq!(band1.nodata().unwrap(), [0u8]); + assert_eq!(&*band1.contiguous_data().unwrap(), &[10u8, 11, 12, 13]); + + assert!(band2.is_indb()); + assert_eq!(band2.data_type(), BandDataType::UInt8); + assert_eq!(band2.nodata().unwrap(), [255u8]); + assert_eq!(&*band2.contiguous_data().unwrap(), &[100u8, 0, 200, 0]); + } + + #[test] + fn append_as_indb_raster_appends_multiple_rasters() { + let temp_dir = TempDir::new().unwrap(); + let byte_path = temp_dir.path().join("byte.tif"); + let byte_path_str = byte_path.to_string_lossy().to_string(); + let multi_path = temp_dir.path().join("multi.tif"); + let multi_path_str = multi_path.to_string_lossy().to_string(); + + with_gdal(|gdal| { + write_byte_tiff(gdal, &byte_path_str); + write_multi_band_tiff(gdal, &multi_path_str); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster_array = with_gdal(|gdal| { + let byte_dataset = + open_dataset(gdal, &byte_path_str).map_err(crate::gdal_common::convert_gdal_err)?; + let multi_dataset = open_dataset(gdal, &multi_path_str) + .map_err(crate::gdal_common::convert_gdal_err)?; + + let mut builder = RasterBuilder::new(2); + append_as_indb_raster(&byte_dataset, &mut builder)?; + append_as_indb_raster(&multi_dataset, &mut builder)?; + builder + .finish() + .map_err(|e| exec_datafusion_err!("Failed to build raster array: {}", e)) + }) + .unwrap(); + + let raster_struct = RasterStructArray::new(&raster_array); + assert_eq!(raster_struct.len(), 2); + + let first = raster_struct.get(0).unwrap(); + assert_eq!(first.width().unwrap(), 3); + assert_eq!(first.height().unwrap(), 2); + assert_eq!(first.num_bands(), 1); + + let second = raster_struct.get(1).unwrap(); + assert_eq!(second.width().unwrap(), 2); + assert_eq!(second.height().unwrap(), 2); + assert_eq!(second.num_bands(), 2); + } +}