From 908093c31a79a8447d8a07a7ab80bd7b615808e4 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Mon, 11 May 2026 10:06:17 +0800 Subject: [PATCH 1/8] feat(rust/sedona-raster-gdal): add RS_FromPath --- Cargo.lock | 5 + rust/sedona-raster-gdal/Cargo.toml | 5 + rust/sedona-raster-gdal/src/lib.rs | 6 + rust/sedona-raster-gdal/src/rs_from_path.rs | 316 ++++++++++++++++++++ rust/sedona/src/context.rs | 4 + 5 files changed, 336 insertions(+) create mode 100644 rust/sedona-raster-gdal/src/rs_from_path.rs diff --git a/Cargo.lock b/Cargo.lock index c3bf04d04..faa3ab25d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6128,14 +6128,19 @@ dependencies = [ name = "sedona-raster-gdal" version = "0.4.0" dependencies = [ + "arrow", "arrow-array", "arrow-buffer", + "arrow-schema", "criterion", "datafusion-common", + "datafusion-expr", "lru 0.18.0", "sedona-common", + "sedona-expr", "sedona-gdal", "sedona-raster", + "sedona-raster-functions", "sedona-schema", "sedona-testing", "tempfile", diff --git a/rust/sedona-raster-gdal/Cargo.toml b/rust/sedona-raster-gdal/Cargo.toml index 5dba31c98..ac1fccc30 100644 --- a/rust/sedona-raster-gdal/Cargo.toml +++ b/rust/sedona-raster-gdal/Cargo.toml @@ -31,13 +31,18 @@ rust-version.workspace = true result_large_err = "allow" [dependencies] +arrow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } lru = { workspace = true } sedona-common = { workspace = true } +sedona-expr = { workspace = true } sedona-gdal = { workspace = true } sedona-raster = { workspace = true } +sedona-raster-functions = { workspace = true } sedona-schema = { workspace = true } [dev-dependencies] diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 8e8c871fb..594a7d7eb 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -32,6 +32,7 @@ mod gdal_common; #[allow(dead_code)] mod gdal_dataset_provider; +mod rs_from_path; mod utils; #[cfg(test)] @@ -42,4 +43,9 @@ pub use gdal_common::{ band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size, nodata_bytes_to_f64, nodata_f64_to_bytes, }; +pub use rs_from_path::rs_from_path_udf; pub use utils::{append_as_indb_raster, dataset_to_indb_raster}; + +pub fn all_gdal_udfs() -> Vec { + vec![rs_from_path_udf()] +} diff --git a/rust/sedona-raster-gdal/src/rs_from_path.rs b/rust/sedona-raster-gdal/src/rs_from_path.rs new file mode 100644 index 000000000..0bdcfcb09 --- /dev/null +++ b/rust/sedona-raster-gdal/src/rs_from_path.rs @@ -0,0 +1,316 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! RS_FromPath UDF - Load out-db raster from file path. + +use std::collections::HashMap; +use std::sync::Arc; + +use arrow::compute::cast; +use arrow_array::{Array, ArrayRef, StructArray}; +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_common::exec_datafusion_err; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_gdal::gdal::Gdal; +use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY}; +use sedona_gdal::raster::types::DatasetOptions; +use sedona_gdal::spatial_ref::SpatialRef; +use sedona_raster::builder::RasterBuilder; +use sedona_raster::traits::{BandMetadata, RasterMetadata}; +use sedona_schema::datatypes::{SedonaType, RASTER}; +use sedona_schema::matchers::ArgMatcher; +use sedona_schema::raster::StorageType; + +use crate::gdal_common::{ + gdal_to_band_data_type, nodata_f64_to_bytes, normalize_outdb_source_path, with_gdal, +}; +use crate::gdal_dataset_provider::configure_thread_local_options; + +pub fn rs_from_path_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_frompath", + vec![ + Arc::new(RsFromPath::new(false)), + Arc::new(RsFromPath::new(true)), + ], + Volatility::Volatile, + ) +} + +#[derive(Debug)] +pub(crate) struct RsFromPath { + with_params: bool, +} + +impl RsFromPath { + pub(crate) fn new(with_params: bool) -> Self { + Self { with_params } + } + + #[allow(dead_code)] + fn parse_params(params: &str) -> HashMap { + params + .split(';') + .filter_map(|pair| { + let parts: Vec<&str> = pair.trim().splitn(2, '=').collect(); + if parts.len() == 2 { + Some((parts[0].trim().to_string(), parts[1].trim().to_string())) + } else { + None + } + }) + .collect() + } + + fn load_outdb_raster(gdal: &Gdal, path: &str, _params: Option<&str>) -> Result { + let gdal_path = normalize_outdb_source_path(path); + let dataset = gdal + .open_ex_with_options( + &gdal_path, + DatasetOptions { + open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, + ..Default::default() + }, + ) + .map_err(|e| { + exec_datafusion_err!( + "Failed to open raster file '{}'(GDAL path '{}'): {}", + path, + gdal_path, + e + ) + })?; + + let (width, height) = dataset.raster_size(); + let geotransform = dataset + .geo_transform() + .map_err(|e| exec_datafusion_err!("Failed to get geotransform: {}", e))?; + + let metadata = RasterMetadata { + width: width as u64, + height: height as u64, + upperleft_x: geotransform[0], + upperleft_y: geotransform[3], + scale_x: geotransform[1], + scale_y: geotransform[5], + skew_x: geotransform[2], + skew_y: geotransform[4], + }; + + let crs = dataset + .spatial_ref() + .ok() + .and_then(|sr: SpatialRef| sr.to_projjson().ok()); + + let mut builder = RasterBuilder::new(1); + builder + .start_raster(&metadata, crs.as_deref()) + .map_err(|e| exec_datafusion_err!("Failed to start raster: {}", e))?; + + let band_count = dataset.raster_count(); + for band_idx in 1..=band_count { + let band = dataset + .rasterband(band_idx) + .map_err(|e| exec_datafusion_err!("Failed to get band {}: {}", band_idx, e))?; + + let gdal_type = band.band_type(); + let band_data_type = gdal_to_band_data_type(gdal_type) + .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; + + let nodata_bytes = band + .no_data_value() + .map(|no_data| nodata_f64_to_bytes(no_data, &band_data_type)); + + let band_metadata = BandMetadata { + nodata_value: nodata_bytes, + storage_type: StorageType::OutDbRef, + datatype: band_data_type, + outdb_url: Some(path.to_string()), + outdb_band_id: Some(band_idx as u32), + }; + + builder + .start_band(band_metadata) + .map_err(|e| exec_datafusion_err!("Failed to start band: {}", e))?; + + builder.band_data_writer().append_value([]); + + builder + .finish_band() + .map_err(|e| exec_datafusion_err!("Failed to finish band: {}", e))?; + } + + builder + .finish_raster() + .map_err(|e| exec_datafusion_err!("Failed to finish raster: {}", e))?; + + builder + .finish() + .map_err(|e| exec_datafusion_err!("Failed to build raster: {}", e)) + } +} + +impl SedonaScalarKernel for RsFromPath { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matchers = if self.with_params { + vec![ArgMatcher::is_string(), ArgMatcher::is_string()] + } else { + vec![ArgMatcher::is_string()] + }; + + let matcher = ArgMatcher::new(matchers, RASTER); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + self.invoke_batch_from_args(arg_types, args, &SedonaType::Arrow(DataType::Null), 0, None) + } + + fn invoke_batch_from_args( + &self, + _arg_types: &[SedonaType], + args: &[ColumnarValue], + _return_type: &SedonaType, + _num_rows: usize, + config_options: Option<&ConfigOptions>, + ) -> Result { + with_gdal(|gdal| { + configure_thread_local_options(gdal, config_options)?; + + let (paths, params_opt) = match &args[0] { + ColumnarValue::Scalar(scalar) => { + let path = scalar.to_array().map_err(|e| { + exec_datafusion_err!("Failed to convert scalar to array: {}", e) + })?; + let params = if self.with_params { + match &args[1] { + ColumnarValue::Scalar(s) => Some(s.to_array().map_err(|e| { + exec_datafusion_err!("Failed to convert params scalar: {}", e) + })?), + ColumnarValue::Array(a) => Some(a.clone()), + } + } else { + None + }; + (path, params) + } + ColumnarValue::Array(array) => { + let params = if self.with_params { + match &args[1] { + ColumnarValue::Scalar(s) => Some(s.to_array().map_err(|e| { + exec_datafusion_err!("Failed to convert params scalar: {}", e) + })?), + ColumnarValue::Array(a) => Some(a.clone()), + } + } else { + None + }; + (array.clone(), params) + } + }; + + let paths = cast(&paths, &DataType::Utf8)?; + let path_array = as_string_array(&paths)?; + + let params_casted = params_opt.map(|p| cast(&p, &DataType::Utf8)).transpose()?; + let params_array = params_casted + .as_ref() + .map(|p| as_string_array(p.as_ref())) + .transpose()?; + + let len = path_array.len(); + if len == 0 { + let builder = RasterBuilder::new(0); + let result = builder + .finish() + .map_err(|e| exec_datafusion_err!("Failed to build empty raster: {}", e))?; + return Ok(ColumnarValue::Array(Arc::new(result))); + } + + let mut combined_arrays: Vec = Vec::with_capacity(len); + for i in 0..len { + if path_array.is_null(i) { + let mut builder = RasterBuilder::new(1); + builder + .append_null() + .map_err(|e| exec_datafusion_err!("Failed to append null: {}", e))?; + let result = builder + .finish() + .map_err(|e| exec_datafusion_err!("Failed to build null raster: {}", e))?; + combined_arrays.push(Arc::new(result)); + } else { + let path = path_array.value(i); + let params = params_array.and_then(|pa| { + if pa.is_null(i) { + None + } else { + Some(pa.value(i)) + } + }); + + let raster = Self::load_outdb_raster(gdal, path, params)?; + combined_arrays.push(Arc::new(raster)); + } + } + + let refs: Vec<&dyn Array> = combined_arrays.iter().map(|a| a.as_ref()).collect(); + let result = arrow::compute::concat(&refs) + .map_err(|e| exec_datafusion_err!("Failed to concatenate rasters: {}", e))?; + + match &args[0] { + ColumnarValue::Scalar(_) => { + let scalar = datafusion_common::ScalarValue::try_from_array(&result, 0)?; + Ok(ColumnarValue::Scalar(scalar)) + } + ColumnarValue::Array(_) => Ok(ColumnarValue::Array(result)), + } + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_params() { + let params = "key1=value1;key2=value2"; + let parsed = RsFromPath::parse_params(params); + assert_eq!(parsed.get("key1"), Some(&"value1".to_string())); + assert_eq!(parsed.get("key2"), Some(&"value2".to_string())); + + let parsed = RsFromPath::parse_params(""); + assert!(parsed.is_empty()); + + let parsed = RsFromPath::parse_params("option=true"); + assert_eq!(parsed.get("option"), Some(&"true".to_string())); + } + + #[test] + fn udf_from_path() { + let udf: datafusion_expr::ScalarUDF = rs_from_path_udf().into(); + assert_eq!(udf.name(), "rs_frompath"); + } +} diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index eb88e9414..cab44f530 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -233,6 +233,10 @@ impl SedonaContext { Arc::new(RandomGeometryFunction::default()), ); + for udf in sedona_raster_gdal::all_gdal_udfs() { + out.ctx.register_udf(udf.into()); + } + // Always register default function set out.register_function_set(sedona_functions::register::default_function_set()); From c7decc3a99a1128b7817555777ed92c3cd03b099 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Tue, 12 May 2026 00:10:21 +0800 Subject: [PATCH 2/8] fix(rust/sedona-raster-gdal): align RS_FromPath with the single-path API --- rust/sedona-raster-gdal/Cargo.toml | 5 + .../benches/rs_from_path.rs | 42 ++++ rust/sedona-raster-gdal/src/rs_from_path.rs | 193 +++++++++--------- 3 files changed, 147 insertions(+), 93 deletions(-) create mode 100644 rust/sedona-raster-gdal/benches/rs_from_path.rs diff --git a/rust/sedona-raster-gdal/Cargo.toml b/rust/sedona-raster-gdal/Cargo.toml index ac1fccc30..8f6fd59b4 100644 --- a/rust/sedona-raster-gdal/Cargo.toml +++ b/rust/sedona-raster-gdal/Cargo.toml @@ -51,3 +51,8 @@ sedona-gdal = { workspace = true, features = ["gdal-sys"] } sedona-testing = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread"] } + +[[bench]] +harness = false +name = "rs_from_path" +path = "benches/rs_from_path.rs" diff --git a/rust/sedona-raster-gdal/benches/rs_from_path.rs b/rust/sedona-raster-gdal/benches/rs_from_path.rs new file mode 100644 index 000000000..631588afb --- /dev/null +++ b/rust/sedona-raster-gdal/benches/rs_from_path.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Benchmarks for RS_FromPath UDF +//! +//! RS_FromPath creates out-db rasters from file paths. +//! +//! NOTE: This benchmark is currently disabled because RS_FromPath has a known issue +//! with RasterBuilder not correctly handling null data for out-db rasters. +//! The out-db path support is still evolving; this file currently contains a placeholder benchmark. +//! +//! Once the out-db raster support is fixed, this benchmark should cover: +//! - Loading rasters with and without extent calculation +//! - Different raster files +//! - Batch processing + +use criterion::{criterion_group, criterion_main, Criterion}; + +fn bench_rs_from_path_placeholder(c: &mut Criterion) { + let mut group = c.benchmark_group("rs_from_path"); + + group.bench_function("placeholder", |b| b.iter(|| std::hint::black_box(42))); + + group.finish(); +} + +criterion_group!(benches, bench_rs_from_path_placeholder); +criterion_main!(benches); diff --git a/rust/sedona-raster-gdal/src/rs_from_path.rs b/rust/sedona-raster-gdal/src/rs_from_path.rs index 0bdcfcb09..d83f59ee9 100644 --- a/rust/sedona-raster-gdal/src/rs_from_path.rs +++ b/rust/sedona-raster-gdal/src/rs_from_path.rs @@ -17,7 +17,6 @@ //! RS_FromPath UDF - Load out-db raster from file path. -use std::collections::HashMap; use std::sync::Arc; use arrow::compute::cast; @@ -47,40 +46,16 @@ use crate::gdal_dataset_provider::configure_thread_local_options; pub fn rs_from_path_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "rs_frompath", - vec![ - Arc::new(RsFromPath::new(false)), - Arc::new(RsFromPath::new(true)), - ], + vec![Arc::new(RsFromPath)], Volatility::Volatile, ) } #[derive(Debug)] -pub(crate) struct RsFromPath { - with_params: bool, -} +pub(crate) struct RsFromPath; impl RsFromPath { - pub(crate) fn new(with_params: bool) -> Self { - Self { with_params } - } - - #[allow(dead_code)] - fn parse_params(params: &str) -> HashMap { - params - .split(';') - .filter_map(|pair| { - let parts: Vec<&str> = pair.trim().splitn(2, '=').collect(); - if parts.len() == 2 { - Some((parts[0].trim().to_string(), parts[1].trim().to_string())) - } else { - None - } - }) - .collect() - } - - fn load_outdb_raster(gdal: &Gdal, path: &str, _params: Option<&str>) -> Result { + fn load_outdb_raster(gdal: &Gdal, path: &str) -> Result { let gdal_path = normalize_outdb_source_path(path); let dataset = gdal .open_ex_with_options( @@ -170,14 +145,7 @@ impl RsFromPath { impl SedonaScalarKernel for RsFromPath { fn return_type(&self, args: &[SedonaType]) -> Result> { - let matchers = if self.with_params { - vec![ArgMatcher::is_string(), ArgMatcher::is_string()] - } else { - vec![ArgMatcher::is_string()] - }; - - let matcher = ArgMatcher::new(matchers, RASTER); - matcher.match_args(args) + ArgMatcher::new(vec![ArgMatcher::is_string()], RASTER).match_args(args) } fn invoke_batch( @@ -199,47 +167,16 @@ impl SedonaScalarKernel for RsFromPath { with_gdal(|gdal| { configure_thread_local_options(gdal, config_options)?; - let (paths, params_opt) = match &args[0] { - ColumnarValue::Scalar(scalar) => { - let path = scalar.to_array().map_err(|e| { - exec_datafusion_err!("Failed to convert scalar to array: {}", e) - })?; - let params = if self.with_params { - match &args[1] { - ColumnarValue::Scalar(s) => Some(s.to_array().map_err(|e| { - exec_datafusion_err!("Failed to convert params scalar: {}", e) - })?), - ColumnarValue::Array(a) => Some(a.clone()), - } - } else { - None - }; - (path, params) - } - ColumnarValue::Array(array) => { - let params = if self.with_params { - match &args[1] { - ColumnarValue::Scalar(s) => Some(s.to_array().map_err(|e| { - exec_datafusion_err!("Failed to convert params scalar: {}", e) - })?), - ColumnarValue::Array(a) => Some(a.clone()), - } - } else { - None - }; - (array.clone(), params) - } + let paths = match &args[0] { + ColumnarValue::Scalar(scalar) => scalar.to_array().map_err(|e| { + exec_datafusion_err!("Failed to convert scalar to array: {}", e) + })?, + ColumnarValue::Array(array) => array.clone(), }; let paths = cast(&paths, &DataType::Utf8)?; let path_array = as_string_array(&paths)?; - let params_casted = params_opt.map(|p| cast(&p, &DataType::Utf8)).transpose()?; - let params_array = params_casted - .as_ref() - .map(|p| as_string_array(p.as_ref())) - .transpose()?; - let len = path_array.len(); if len == 0 { let builder = RasterBuilder::new(0); @@ -262,15 +199,7 @@ impl SedonaScalarKernel for RsFromPath { combined_arrays.push(Arc::new(result)); } else { let path = path_array.value(i); - let params = params_array.and_then(|pa| { - if pa.is_null(i) { - None - } else { - Some(pa.value(i)) - } - }); - - let raster = Self::load_outdb_raster(gdal, path, params)?; + let raster = Self::load_outdb_raster(gdal, path)?; combined_arrays.push(Arc::new(raster)); } } @@ -293,24 +222,102 @@ impl SedonaScalarKernel for RsFromPath { #[cfg(test)] mod tests { use super::*; + use crate::gdal_common::with_gdal; + + #[test] + fn udf_from_path() { + let udf: datafusion_expr::ScalarUDF = rs_from_path_udf().into(); + assert_eq!(udf.name(), "rs_frompath"); + } #[test] - fn test_parse_params() { - let params = "key1=value1;key2=value2"; - let parsed = RsFromPath::parse_params(params); - assert_eq!(parsed.get("key1"), Some(&"value1".to_string())); - assert_eq!(parsed.get("key2"), Some(&"value2".to_string())); + fn test_load_outdb_raster_from_file() { + use sedona_testing::data::test_raster; + + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let raster = with_gdal(|gdal| RsFromPath::load_outdb_raster(gdal, &path)) + .expect("Should load raster from path"); + + assert_eq!(raster.len(), 1); - let parsed = RsFromPath::parse_params(""); - assert!(parsed.is_empty()); + use datafusion_common::cast::{ + as_list_array, as_string_array, as_string_view_array, as_struct_array, as_uint32_array, + as_uint64_array, + }; + use sedona_schema::raster::{ + band_indices, band_metadata_indices, metadata_indices, raster_indices, + }; - let parsed = RsFromPath::parse_params("option=true"); - assert_eq!(parsed.get("option"), Some(&"true".to_string())); + let metadata_struct = as_struct_array(raster.column(raster_indices::METADATA)).unwrap(); + let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(0); + let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(0); + + assert_eq!(width, 10); + assert_eq!(height, 10); + + let crs = as_string_view_array(raster.column(raster_indices::CRS)).unwrap(); + assert!(!crs.is_null(0)); + + let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); + let bands_struct = as_struct_array(bands_list.values()).unwrap(); + let band_metadata_struct = + as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); + + let outdb_url = + as_string_array(band_metadata_struct.column(band_metadata_indices::OUTDB_URL)).unwrap(); + assert!(!outdb_url.is_null(0)); + assert!(outdb_url.value(0).contains("test4.tiff")); + + let storage_type = + as_uint32_array(band_metadata_struct.column(band_metadata_indices::STORAGE_TYPE)) + .unwrap(); + assert_eq!( + storage_type.value(0), + sedona_schema::raster::StorageType::OutDbRef as u32 + ); } #[test] - fn udf_from_path() { - let udf: datafusion_expr::ScalarUDF = rs_from_path_udf().into(); - assert_eq!(udf.name(), "rs_frompath"); + fn test_invoke_rs_from_path() { + use arrow_array::StringArray; + use datafusion_common::cast::{as_struct_array, as_uint64_array}; + use sedona_expr::scalar_udf::SedonaScalarKernel; + use sedona_schema::raster::{metadata_indices, raster_indices}; + use sedona_testing::data::test_raster; + + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let paths = Arc::new(StringArray::from(vec![path.as_str()])); + let input = ColumnarValue::Array(paths); + + let kernel = RsFromPath; + let result = kernel + .invoke_batch_from_args(&[], &[input], &SedonaType::Arrow(DataType::Null), 0, None) + .expect("Should invoke successfully"); + + match result { + ColumnarValue::Array(arr) => { + let struct_arr = as_struct_array(&arr).unwrap(); + assert_eq!(struct_arr.len(), 1); + + let metadata_struct = + as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); + let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(0); + let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(0); + + assert_eq!(width, 10); + assert_eq!(height, 10); + } + _ => panic!("Expected array result"), + } } } From 73927fa6131ca71497007ab62a109c0a6498fe47 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Wed, 13 May 2026 00:52:32 +0800 Subject: [PATCH 3/8] Refactored RS_FromPath after manual reviewing --- docs/reference/sql/rs_frompath.qmd | 40 +++ rust/sedona-raster-gdal/Cargo.toml | 4 +- .../benches/rs_from_path.rs | 42 --- .../sedona-raster-gdal/benches/rs_frompath.rs | 93 +++++ rust/sedona-raster-gdal/src/gdal_common.rs | 77 +++-- .../src/gdal_dataset_provider.rs | 30 +- rust/sedona-raster-gdal/src/lib.rs | 8 +- rust/sedona-raster-gdal/src/rs_from_path.rs | 323 ------------------ rust/sedona-raster-gdal/src/rs_frompath.rs | 216 ++++++++++++ rust/sedona-raster-gdal/src/utils.rs | 192 ++++++++++- 10 files changed, 582 insertions(+), 443 deletions(-) create mode 100644 docs/reference/sql/rs_frompath.qmd delete mode 100644 rust/sedona-raster-gdal/benches/rs_from_path.rs create mode 100644 rust/sedona-raster-gdal/benches/rs_frompath.rs delete mode 100644 rust/sedona-raster-gdal/src/rs_from_path.rs create mode 100644 rust/sedona-raster-gdal/src/rs_frompath.rs diff --git a/docs/reference/sql/rs_frompath.qmd b/docs/reference/sql/rs_frompath.qmd new file mode 100644 index 000000000..cba038e0d --- /dev/null +++ b/docs/reference/sql/rs_frompath.qmd @@ -0,0 +1,40 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_FromPath +description: Creates an out-of-database raster from a raster file path. +kernels: + - returns: raster + args: + - name: path + type: string +--- + +## Description + +Loads raster metadata from the file at `path` and returns a raster whose bands +reference the source file as out-db bands. + +This is useful when you want to work with rasters stored on disk without copying +their pixel data into the raster value itself. + +## Examples + +```sql +SELECT RS_BandPath(RS_FromPath('/tmp/example.tif')); +``` diff --git a/rust/sedona-raster-gdal/Cargo.toml b/rust/sedona-raster-gdal/Cargo.toml index 8f6fd59b4..78c51190c 100644 --- a/rust/sedona-raster-gdal/Cargo.toml +++ b/rust/sedona-raster-gdal/Cargo.toml @@ -54,5 +54,5 @@ tokio = { workspace = true, features = ["rt-multi-thread"] } [[bench]] harness = false -name = "rs_from_path" -path = "benches/rs_from_path.rs" +name = "rs_frompath" +path = "benches/rs_frompath.rs" diff --git a/rust/sedona-raster-gdal/benches/rs_from_path.rs b/rust/sedona-raster-gdal/benches/rs_from_path.rs deleted file mode 100644 index 631588afb..000000000 --- a/rust/sedona-raster-gdal/benches/rs_from_path.rs +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Benchmarks for RS_FromPath UDF -//! -//! RS_FromPath creates out-db rasters from file paths. -//! -//! NOTE: This benchmark is currently disabled because RS_FromPath has a known issue -//! with RasterBuilder not correctly handling null data for out-db rasters. -//! The out-db path support is still evolving; this file currently contains a placeholder benchmark. -//! -//! Once the out-db raster support is fixed, this benchmark should cover: -//! - Loading rasters with and without extent calculation -//! - Different raster files -//! - Batch processing - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn bench_rs_from_path_placeholder(c: &mut Criterion) { - let mut group = c.benchmark_group("rs_from_path"); - - group.bench_function("placeholder", |b| b.iter(|| std::hint::black_box(42))); - - group.finish(); -} - -criterion_group!(benches, bench_rs_from_path_placeholder); -criterion_main!(benches); diff --git a/rust/sedona-raster-gdal/benches/rs_frompath.rs b/rust/sedona-raster-gdal/benches/rs_frompath.rs new file mode 100644 index 000000000..93b213b53 --- /dev/null +++ b/rust/sedona-raster-gdal/benches/rs_frompath.rs @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Benchmarks for RS_FromPath UDF. +//! +//! RS_FromPath creates out-db rasters from file paths, so these benchmarks use +//! raster fixtures from the `sedona-testing` test module rather than synthetic input. + +use std::{hint::black_box, sync::Arc}; + +use arrow_array::{ArrayRef, StringArray}; +use arrow_schema::DataType; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use datafusion_expr::ScalarUDF; +use sedona_schema::datatypes::SedonaType; +use sedona_testing::{data::test_raster, testers::ScalarUdfTester}; + +const SMALL_RASTER_FIXTURES: &[&str] = &[ + "test1.tiff", + "test2.tif", + "test3.tif", + "test4.tiff", + "test5.tiff", +]; + +fn raster_path_array(names: &[&str], rows: usize) -> ArrayRef { + assert!( + !names.is_empty(), + "benchmark fixture list must not be empty" + ); + + let paths = names + .iter() + .map(|name| test_raster(name).unwrap()) + .collect::>(); + + let values = (0..rows) + .map(|index| paths[index % paths.len()].as_str()) + .collect::>(); + + Arc::new(StringArray::from(values)) +} + +fn bench_rs_frompath(c: &mut Criterion) { + let udf: ScalarUDF = sedona_raster_gdal::rs_from_path_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![SedonaType::Arrow(DataType::Utf8)]); + + let single_small = raster_path_array(&["test4.tiff"], 1); + let mixed_small = raster_path_array(SMALL_RASTER_FIXTURES, SMALL_RASTER_FIXTURES.len()); + let batched_small = raster_path_array(SMALL_RASTER_FIXTURES, 256); + + let mut group = c.benchmark_group("rs_frompath"); + + group.throughput(Throughput::Elements(single_small.len() as u64)); + group.bench_with_input( + BenchmarkId::new("fixtures", "single_small"), + &single_small, + |b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())), + ); + + group.throughput(Throughput::Elements(mixed_small.len() as u64)); + group.bench_with_input( + BenchmarkId::new("fixtures", "mixed_small"), + &mixed_small, + |b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())), + ); + + group.throughput(Throughput::Elements(batched_small.len() as u64)); + group.bench_with_input( + BenchmarkId::new("fixtures", "batched_small"), + &batched_small, + |b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())), + ); + + group.finish(); +} + +criterion_group!(benches, bench_rs_frompath); +criterion_main!(benches); diff --git a/rust/sedona-raster-gdal/src/gdal_common.rs b/rust/sedona-raster-gdal/src/gdal_common.rs index 2a6fad688..072b0bdef 100644 --- a/rust/sedona-raster-gdal/src/gdal_common.rs +++ b/rust/sedona-raster-gdal/src/gdal_common.rs @@ -21,6 +21,7 @@ use sedona_gdal::gdal::Gdal; use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY, GDAL_OF_VERBOSE_ERROR}; use sedona_gdal::geo_transform::GeoTransform; use sedona_gdal::mem::MemDatasetBuilder; +use sedona_gdal::raster::rasterband::RasterBand; use sedona_gdal::raster::types::DatasetOptions; use sedona_gdal::raster::types::GdalDataType; @@ -268,37 +269,11 @@ pub unsafe fn raster_ref_to_gdal_mem( .band(src_band_index) .map_err(|e| arrow_datafusion_err!(e))?; let band_metadata = band.metadata(); - let band_type = band_metadata.data_type()?; if let Some(nodata_bytes) = band_metadata.nodata_value() { let raster_band = dataset .rasterband(dst_band_index) .map_err(convert_gdal_err)?; - match band_type { - BandDataType::UInt64 => { - let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| { - exec_datafusion_err!("Invalid nodata byte length for UInt64") - })?; - let nodata = u64::from_le_bytes(nodata_bytes); - raster_band - .set_no_data_value_u64(Some(nodata)) - .map_err(convert_gdal_err)?; - } - BandDataType::Int64 => { - let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| { - exec_datafusion_err!("Invalid nodata byte length for Int64") - })?; - let nodata = i64::from_le_bytes(nodata_bytes); - raster_band - .set_no_data_value_i64(Some(nodata)) - .map_err(convert_gdal_err)?; - } - _ => { - let nodata = bytes_to_f64(nodata_bytes, &band_type)?; - raster_band - .set_no_data_value(Some(nodata)) - .map_err(convert_gdal_err)?; - } - } + set_band_nodata_from_bytes(&raster_band, Some(nodata_bytes))?; } } @@ -320,6 +295,54 @@ pub fn nodata_bytes_to_f64(nodata_bytes: Option<&[u8]>, band_type: &BandDataType bytes_to_f64(bytes, band_type).ok() } +/// Read a GDAL band's nodata value into a byte vector using the band's native type. +pub fn band_nodata_to_bytes(band: &RasterBand<'_>) -> Result>> { + let band_type = gdal_to_band_data_type(band.band_type())?; + + Ok(match band_type { + BandDataType::UInt64 => band + .no_data_value_u64() + .map(|nodata| nodata.to_le_bytes().to_vec()), + BandDataType::Int64 => band + .no_data_value_i64() + .map(|nodata| nodata.to_le_bytes().to_vec()), + _ => band + .no_data_value() + .map(|nodata| nodata_f64_to_bytes(nodata, &band_type)), + }) +} + +/// Set a GDAL band's nodata value from stored bytes using the band's native type. +pub fn set_band_nodata_from_bytes( + band: &RasterBand<'_>, + nodata_bytes: Option<&[u8]>, +) -> Result<()> { + let band_type = gdal_to_band_data_type(band.band_type())?; + + match (nodata_bytes, band_type) { + (Some(bytes), BandDataType::UInt64) => { + let bytes: [u8; 8] = bytes + .try_into() + .map_err(|_| exec_datafusion_err!("Invalid nodata byte length for UInt64"))?; + band.set_no_data_value_u64(Some(u64::from_le_bytes(bytes))) + .map_err(convert_gdal_err) + } + (Some(bytes), BandDataType::Int64) => { + let bytes: [u8; 8] = bytes + .try_into() + .map_err(|_| exec_datafusion_err!("Invalid nodata byte length for Int64"))?; + band.set_no_data_value_i64(Some(i64::from_le_bytes(bytes))) + .map_err(convert_gdal_err) + } + (Some(bytes), band_type) => band + .set_no_data_value(Some(bytes_to_f64(bytes, &band_type)?)) + .map_err(convert_gdal_err), + (None, BandDataType::UInt64) => band.set_no_data_value_u64(None).map_err(convert_gdal_err), + (None, BandDataType::Int64) => band.set_no_data_value_i64(None).map_err(convert_gdal_err), + (None, _) => band.set_no_data_value(None).map_err(convert_gdal_err), + } +} + /// Convert a f64 nodata value into a byte vector appropriate for the given band type. pub fn nodata_f64_to_bytes(nodata: f64, band_type: &BandDataType) -> Vec { match band_type { diff --git a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs index a9d1013c5..f8f878677 100644 --- a/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs +++ b/rust/sedona-raster-gdal/src/gdal_dataset_provider.rs @@ -34,7 +34,8 @@ use sedona_schema::raster::{BandDataType, StorageType}; use crate::gdal_common::{ band_data_type_to_gdal, bytes_to_f64, convert_gdal_err, normalize_outdb_source_path, - open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem, ToGdalGeoTransform, + open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem, + set_band_nodata_from_bytes, ToGdalGeoTransform, }; /// A GDAL dataset constructed from a `RasterRef`. @@ -281,32 +282,7 @@ impl GDALDatasetCache { let vrt_band = vrt.rasterband(i).map_err(convert_gdal_err)?; if let Some(nodata_bytes) = band_metadata.nodata_value() { - match band_type { - BandDataType::UInt64 => { - let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| { - exec_datafusion_err!("Invalid nodata byte length for UInt64") - })?; - let nodata = u64::from_le_bytes(nodata_bytes); - vrt_band - .set_no_data_value_u64(Some(nodata)) - .map_err(convert_gdal_err)?; - } - BandDataType::Int64 => { - let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| { - exec_datafusion_err!("Invalid nodata byte length for Int64") - })?; - let nodata = i64::from_le_bytes(nodata_bytes); - vrt_band - .set_no_data_value_i64(Some(nodata)) - .map_err(convert_gdal_err)?; - } - _ => { - let nodata = bytes_to_f64(nodata_bytes, &band_type)?; - vrt_band - .set_no_data_value(nodata) - .map_err(convert_gdal_err)?; - } - } + set_band_nodata_from_bytes(&vrt_band, Some(nodata_bytes))?; } match band_metadata.storage_type()? { diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 594a7d7eb..52f286fd8 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -25,14 +25,12 @@ //! - GDAL datatype and nodata conversion helpers //! - path normalization for GDAL VSI-backed raster sources -// Temporary until https://github.com/apache/sedona-db/issues/804 is resolved. -#[allow(dead_code)] mod gdal_common; // Temporary until https://github.com/apache/sedona-db/issues/804 is resolved. #[allow(dead_code)] mod gdal_dataset_provider; -mod rs_from_path; +mod rs_frompath; mod utils; #[cfg(test)] @@ -43,8 +41,8 @@ pub use gdal_common::{ band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size, nodata_bytes_to_f64, nodata_f64_to_bytes, }; -pub use rs_from_path::rs_from_path_udf; -pub use utils::{append_as_indb_raster, dataset_to_indb_raster}; +pub use rs_frompath::rs_from_path_udf; +pub use utils::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster}; pub fn all_gdal_udfs() -> Vec { vec![rs_from_path_udf()] diff --git a/rust/sedona-raster-gdal/src/rs_from_path.rs b/rust/sedona-raster-gdal/src/rs_from_path.rs deleted file mode 100644 index d83f59ee9..000000000 --- a/rust/sedona-raster-gdal/src/rs_from_path.rs +++ /dev/null @@ -1,323 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! RS_FromPath UDF - Load out-db raster from file path. - -use std::sync::Arc; - -use arrow::compute::cast; -use arrow_array::{Array, ArrayRef, StructArray}; -use arrow_schema::DataType; -use datafusion_common::cast::as_string_array; -use datafusion_common::config::ConfigOptions; -use datafusion_common::error::Result; -use datafusion_common::exec_datafusion_err; -use datafusion_expr::{ColumnarValue, Volatility}; -use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_gdal::gdal::Gdal; -use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY}; -use sedona_gdal::raster::types::DatasetOptions; -use sedona_gdal::spatial_ref::SpatialRef; -use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::{BandMetadata, RasterMetadata}; -use sedona_schema::datatypes::{SedonaType, RASTER}; -use sedona_schema::matchers::ArgMatcher; -use sedona_schema::raster::StorageType; - -use crate::gdal_common::{ - gdal_to_band_data_type, nodata_f64_to_bytes, normalize_outdb_source_path, with_gdal, -}; -use crate::gdal_dataset_provider::configure_thread_local_options; - -pub fn rs_from_path_udf() -> SedonaScalarUDF { - SedonaScalarUDF::new( - "rs_frompath", - vec![Arc::new(RsFromPath)], - Volatility::Volatile, - ) -} - -#[derive(Debug)] -pub(crate) struct RsFromPath; - -impl RsFromPath { - fn load_outdb_raster(gdal: &Gdal, path: &str) -> Result { - let gdal_path = normalize_outdb_source_path(path); - let dataset = gdal - .open_ex_with_options( - &gdal_path, - DatasetOptions { - open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, - ..Default::default() - }, - ) - .map_err(|e| { - exec_datafusion_err!( - "Failed to open raster file '{}'(GDAL path '{}'): {}", - path, - gdal_path, - e - ) - })?; - - let (width, height) = dataset.raster_size(); - let geotransform = dataset - .geo_transform() - .map_err(|e| exec_datafusion_err!("Failed to get geotransform: {}", e))?; - - let metadata = RasterMetadata { - width: width as u64, - height: height as u64, - upperleft_x: geotransform[0], - upperleft_y: geotransform[3], - scale_x: geotransform[1], - scale_y: geotransform[5], - skew_x: geotransform[2], - skew_y: geotransform[4], - }; - - let crs = dataset - .spatial_ref() - .ok() - .and_then(|sr: SpatialRef| sr.to_projjson().ok()); - - let mut builder = RasterBuilder::new(1); - builder - .start_raster(&metadata, crs.as_deref()) - .map_err(|e| exec_datafusion_err!("Failed to start raster: {}", e))?; - - let band_count = dataset.raster_count(); - for band_idx in 1..=band_count { - let band = dataset - .rasterband(band_idx) - .map_err(|e| exec_datafusion_err!("Failed to get band {}: {}", band_idx, e))?; - - let gdal_type = band.band_type(); - let band_data_type = gdal_to_band_data_type(gdal_type) - .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; - - let nodata_bytes = band - .no_data_value() - .map(|no_data| nodata_f64_to_bytes(no_data, &band_data_type)); - - let band_metadata = BandMetadata { - nodata_value: nodata_bytes, - storage_type: StorageType::OutDbRef, - datatype: band_data_type, - outdb_url: Some(path.to_string()), - outdb_band_id: Some(band_idx as u32), - }; - - builder - .start_band(band_metadata) - .map_err(|e| exec_datafusion_err!("Failed to start band: {}", e))?; - - builder.band_data_writer().append_value([]); - - builder - .finish_band() - .map_err(|e| exec_datafusion_err!("Failed to finish band: {}", e))?; - } - - builder - .finish_raster() - .map_err(|e| exec_datafusion_err!("Failed to finish raster: {}", e))?; - - builder - .finish() - .map_err(|e| exec_datafusion_err!("Failed to build raster: {}", e)) - } -} - -impl SedonaScalarKernel for RsFromPath { - fn return_type(&self, args: &[SedonaType]) -> Result> { - ArgMatcher::new(vec![ArgMatcher::is_string()], RASTER).match_args(args) - } - - fn invoke_batch( - &self, - arg_types: &[SedonaType], - args: &[ColumnarValue], - ) -> Result { - self.invoke_batch_from_args(arg_types, args, &SedonaType::Arrow(DataType::Null), 0, None) - } - - fn invoke_batch_from_args( - &self, - _arg_types: &[SedonaType], - args: &[ColumnarValue], - _return_type: &SedonaType, - _num_rows: usize, - config_options: Option<&ConfigOptions>, - ) -> Result { - with_gdal(|gdal| { - configure_thread_local_options(gdal, config_options)?; - - let paths = match &args[0] { - ColumnarValue::Scalar(scalar) => scalar.to_array().map_err(|e| { - exec_datafusion_err!("Failed to convert scalar to array: {}", e) - })?, - ColumnarValue::Array(array) => array.clone(), - }; - - let paths = cast(&paths, &DataType::Utf8)?; - let path_array = as_string_array(&paths)?; - - let len = path_array.len(); - if len == 0 { - let builder = RasterBuilder::new(0); - let result = builder - .finish() - .map_err(|e| exec_datafusion_err!("Failed to build empty raster: {}", e))?; - return Ok(ColumnarValue::Array(Arc::new(result))); - } - - let mut combined_arrays: Vec = Vec::with_capacity(len); - for i in 0..len { - if path_array.is_null(i) { - let mut builder = RasterBuilder::new(1); - builder - .append_null() - .map_err(|e| exec_datafusion_err!("Failed to append null: {}", e))?; - let result = builder - .finish() - .map_err(|e| exec_datafusion_err!("Failed to build null raster: {}", e))?; - combined_arrays.push(Arc::new(result)); - } else { - let path = path_array.value(i); - let raster = Self::load_outdb_raster(gdal, path)?; - combined_arrays.push(Arc::new(raster)); - } - } - - let refs: Vec<&dyn Array> = combined_arrays.iter().map(|a| a.as_ref()).collect(); - let result = arrow::compute::concat(&refs) - .map_err(|e| exec_datafusion_err!("Failed to concatenate rasters: {}", e))?; - - match &args[0] { - ColumnarValue::Scalar(_) => { - let scalar = datafusion_common::ScalarValue::try_from_array(&result, 0)?; - Ok(ColumnarValue::Scalar(scalar)) - } - ColumnarValue::Array(_) => Ok(ColumnarValue::Array(result)), - } - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::gdal_common::with_gdal; - - #[test] - fn udf_from_path() { - let udf: datafusion_expr::ScalarUDF = rs_from_path_udf().into(); - assert_eq!(udf.name(), "rs_frompath"); - } - - #[test] - fn test_load_outdb_raster_from_file() { - use sedona_testing::data::test_raster; - - let path = test_raster("test4.tiff").expect("test4.tiff should exist"); - - let raster = with_gdal(|gdal| RsFromPath::load_outdb_raster(gdal, &path)) - .expect("Should load raster from path"); - - assert_eq!(raster.len(), 1); - - use datafusion_common::cast::{ - as_list_array, as_string_array, as_string_view_array, as_struct_array, as_uint32_array, - as_uint64_array, - }; - use sedona_schema::raster::{ - band_indices, band_metadata_indices, metadata_indices, raster_indices, - }; - - let metadata_struct = as_struct_array(raster.column(raster_indices::METADATA)).unwrap(); - let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(0); - let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(0); - - assert_eq!(width, 10); - assert_eq!(height, 10); - - let crs = as_string_view_array(raster.column(raster_indices::CRS)).unwrap(); - assert!(!crs.is_null(0)); - - let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); - let bands_struct = as_struct_array(bands_list.values()).unwrap(); - let band_metadata_struct = - as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); - - let outdb_url = - as_string_array(band_metadata_struct.column(band_metadata_indices::OUTDB_URL)).unwrap(); - assert!(!outdb_url.is_null(0)); - assert!(outdb_url.value(0).contains("test4.tiff")); - - let storage_type = - as_uint32_array(band_metadata_struct.column(band_metadata_indices::STORAGE_TYPE)) - .unwrap(); - assert_eq!( - storage_type.value(0), - sedona_schema::raster::StorageType::OutDbRef as u32 - ); - } - - #[test] - fn test_invoke_rs_from_path() { - use arrow_array::StringArray; - use datafusion_common::cast::{as_struct_array, as_uint64_array}; - use sedona_expr::scalar_udf::SedonaScalarKernel; - use sedona_schema::raster::{metadata_indices, raster_indices}; - use sedona_testing::data::test_raster; - - let path = test_raster("test4.tiff").expect("test4.tiff should exist"); - - let paths = Arc::new(StringArray::from(vec![path.as_str()])); - let input = ColumnarValue::Array(paths); - - let kernel = RsFromPath; - let result = kernel - .invoke_batch_from_args(&[], &[input], &SedonaType::Arrow(DataType::Null), 0, None) - .expect("Should invoke successfully"); - - match result { - ColumnarValue::Array(arr) => { - let struct_arr = as_struct_array(&arr).unwrap(); - assert_eq!(struct_arr.len(), 1); - - let metadata_struct = - as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); - let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(0); - let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(0); - - assert_eq!(width, 10); - assert_eq!(height, 10); - } - _ => panic!("Expected array result"), - } - } -} diff --git a/rust/sedona-raster-gdal/src/rs_frompath.rs b/rust/sedona-raster-gdal/src/rs_frompath.rs new file mode 100644 index 000000000..31b5edd52 --- /dev/null +++ b/rust/sedona-raster-gdal/src/rs_frompath.rs @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! RS_FromPath UDF - Load out-db raster from file path. + +use std::sync::Arc; + +use arrow_array::Array; +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_schema::datatypes::{SedonaType, RASTER}; +use sedona_schema::matchers::ArgMatcher; + +use crate::gdal_common::with_gdal; +use crate::gdal_dataset_provider::configure_thread_local_options; +use crate::utils::append_as_outdb_raster; + +pub fn rs_from_path_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_frompath", + vec![Arc::new(RsFromPath)], + Volatility::Volatile, + ) +} + +#[derive(Debug)] +pub(crate) struct RsFromPath; + +impl SedonaScalarKernel for RsFromPath { + fn return_type(&self, args: &[SedonaType]) -> Result> { + ArgMatcher::new(vec![ArgMatcher::is_string()], RASTER).match_args(args) + } + + fn invoke_batch_from_args( + &self, + _arg_types: &[SedonaType], + args: &[ColumnarValue], + _return_type: &SedonaType, + _num_rows: usize, + config_options: Option<&ConfigOptions>, + ) -> Result { + with_gdal(|gdal| { + configure_thread_local_options(gdal, config_options)?; + + let paths = args[0].cast_to(&DataType::Utf8, None)?.into_array(1)?; + let path_array = as_string_array(&paths)?; + + let len = path_array.len(); + let mut builder = RasterBuilder::new(len); + for i in 0..len { + if path_array.is_null(i) { + builder.append_null()?; + } else { + let path = path_array.value(i); + append_as_outdb_raster(gdal, path, &mut builder)?; + } + } + + let result: Arc = Arc::new(builder.finish()?); + + match &args[0] { + ColumnarValue::Scalar(_) => { + let scalar = datafusion_common::ScalarValue::try_from_array(&result, 0)?; + Ok(ColumnarValue::Scalar(scalar)) + } + ColumnarValue::Array(_) => Ok(ColumnarValue::Array(result)), + } + }) + } + + fn invoke_batch( + &self, + _arg_types: &[SedonaType], + _args: &[ColumnarValue], + ) -> Result { + sedona_internal_err!("Should not be called because invoke_batch_from_args() is implemented") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StringArray; + use datafusion_common::cast::{as_struct_array, as_uint64_array}; + use datafusion_common::ScalarValue; + use sedona_expr::scalar_udf::SedonaScalarKernel; + use sedona_schema::raster::{metadata_indices, raster_indices}; + use sedona_testing::data::test_raster; + + fn assert_raster_dimensions( + result: &ColumnarValue, + expected_len: usize, + width: u64, + height: u64, + ) { + match result { + ColumnarValue::Array(arr) => { + let struct_arr = as_struct_array(arr).unwrap(); + assert_eq!(struct_arr.len(), expected_len); + + let metadata_struct = + as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); + for idx in 0..expected_len { + let actual_width = + as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(idx); + let actual_height = + as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(idx); + + assert_eq!(actual_width, width); + assert_eq!(actual_height, height); + } + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_arr)) => { + assert_eq!(struct_arr.len(), 1); + + let metadata_struct = + as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); + let actual_width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(0); + let actual_height = + as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(0); + + assert_eq!(actual_width, width); + assert_eq!(actual_height, height); + } + other => panic!("Unexpected result: {other:?}"), + } + } + + #[test] + fn test_invoke_rs_from_path() { + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let paths = Arc::new(StringArray::from(vec![path.as_str()])); + let input = ColumnarValue::Array(paths); + + let kernel = RsFromPath; + let result = kernel + .invoke_batch_from_args(&[], &[input], &SedonaType::Arrow(DataType::Null), 0, None) + .expect("Should invoke successfully"); + + assert_raster_dimensions(&result, 1, 10, 10); + + let scalar_input = ColumnarValue::Scalar(ScalarValue::Utf8(Some(path.clone()))); + let scalar_result = kernel + .invoke_batch_from_args( + &[], + &[scalar_input], + &SedonaType::Arrow(DataType::Null), + 0, + None, + ) + .expect("Should invoke successfully for scalar path"); + + assert_raster_dimensions(&scalar_result, 1, 10, 10); + + let multi_paths = Arc::new(StringArray::from(vec![path.as_str(), path.as_str()])); + let multi_result = kernel + .invoke_batch_from_args( + &[], + &[ColumnarValue::Array(multi_paths)], + &SedonaType::Arrow(DataType::Null), + 0, + None, + ) + .expect("Should invoke successfully for multiple paths"); + + assert_raster_dimensions(&multi_result, 2, 10, 10); + + let empty_paths = Arc::new(StringArray::from(Vec::<&str>::new())); + let empty_result = kernel + .invoke_batch_from_args( + &[], + &[ColumnarValue::Array(empty_paths)], + &SedonaType::Arrow(DataType::Null), + 0, + None, + ) + .expect("Should invoke successfully for empty paths"); + + match empty_result { + ColumnarValue::Array(arr) => { + let struct_arr = as_struct_array(&arr).unwrap(); + assert_eq!(struct_arr.len(), 0); + } + other => panic!("Expected empty array result, got {other:?}"), + } + } +} diff --git a/rust/sedona-raster-gdal/src/utils.rs b/rust/sedona-raster-gdal/src/utils.rs index 30f543071..d7cc4bc02 100644 --- a/rust/sedona-raster-gdal/src/utils.rs +++ b/rust/sedona-raster-gdal/src/utils.rs @@ -22,13 +22,19 @@ use arrow_buffer::Buffer; use datafusion_common::error::Result; use datafusion_common::exec_datafusion_err; use sedona_gdal::dataset::Dataset; +use sedona_gdal::gdal::Gdal; +use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY}; +use sedona_gdal::raster::types::DatasetOptions; use sedona_gdal::spatial_ref::SpatialRef; use sedona_raster::builder::RasterBuilder; use sedona_raster::traits::BandMetadata; -use sedona_schema::raster::{BandDataType, StorageType}; +use sedona_schema::raster::StorageType; -use crate::gdal_common::{gdal_to_band_data_type, RasterMetadataFromGdalGeoTransform}; +use crate::gdal_common::{ + band_nodata_to_bytes, gdal_to_band_data_type, normalize_outdb_source_path, + RasterMetadataFromGdalGeoTransform, +}; /// Append a GDAL dataset as a single in-db raster to the provided [`RasterBuilder`]. pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> Result<()> { @@ -59,18 +65,7 @@ pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> let band_data_type = gdal_to_band_data_type(gdal_type) .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; - // Get nodata value - let nodata_bytes = match band_data_type { - BandDataType::UInt64 => band - .no_data_value_u64() - .map(|no_data| no_data.to_le_bytes().to_vec()), - BandDataType::Int64 => band - .no_data_value_i64() - .map(|no_data| no_data.to_le_bytes().to_vec()), - _ => band - .no_data_value() - .map(|no_data| crate::gdal_common::nodata_f64_to_bytes(no_data, &band_data_type)), - }; + let nodata_bytes = band_nodata_to_bytes(&band)?; let band_metadata = BandMetadata { nodata_value: nodata_bytes, @@ -109,6 +104,68 @@ pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> Ok(()) } +/// Append a raster source path as a single out-db raster to the provided [`RasterBuilder`]. +pub fn append_as_outdb_raster(gdal: &Gdal, path: &str, builder: &mut RasterBuilder) -> Result<()> { + let gdal_path = normalize_outdb_source_path(path); + let dataset = gdal + .open_ex_with_options( + &gdal_path, + DatasetOptions { + open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, + ..Default::default() + }, + ) + .map_err(|e| { + exec_datafusion_err!( + "Failed to open raster file '{}'(GDAL path '{}'): {}", + path, + gdal_path, + e + ) + })?; + + let (width, height) = dataset.raster_size(); + let geotransform = dataset + .geo_transform() + .map_err(|e| exec_datafusion_err!("Failed to get geotransform: {}", e))?; + let metadata = geotransform.to_raster_metadata(width, height); + + let crs = dataset + .spatial_ref() + .ok() + .and_then(|sr: SpatialRef| sr.to_projjson().ok()); + + builder.start_raster(&metadata, crs.as_deref())?; + + let band_count = dataset.raster_count(); + for band_idx in 1..=band_count { + let band = dataset + .rasterband(band_idx) + .map_err(|e| exec_datafusion_err!("Failed to get band {}: {}", band_idx, e))?; + + let gdal_type = band.band_type(); + let band_data_type = gdal_to_band_data_type(gdal_type) + .map_err(|_| exec_datafusion_err!("Unsupported band data type: {:?}", gdal_type))?; + + let nodata_bytes = band_nodata_to_bytes(&band)?; + + let band_metadata = BandMetadata { + nodata_value: nodata_bytes, + storage_type: StorageType::OutDbRef, + datatype: band_data_type, + outdb_url: Some(path.to_string()), + outdb_band_id: Some(band_idx as u32), + }; + + builder.start_band(band_metadata)?; + builder.band_data_writer().append_value([]); + builder.finish_band()?; + } + + builder.finish_raster()?; + Ok(()) +} + /// Materialize a single GDAL dataset as an in-db raster `StructArray`. pub fn dataset_to_indb_raster(dataset: &Dataset) -> Result { let mut builder = RasterBuilder::new(1); @@ -121,9 +178,13 @@ pub fn dataset_to_indb_raster(dataset: &Dataset) -> Result { #[cfg(test)] mod tests { - use super::{append_as_indb_raster, dataset_to_indb_raster}; + use super::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster}; - use arrow_array::StructArray; + use arrow_array::{Array, StructArray}; + use datafusion_common::cast::{ + as_binary_array, as_list_array, as_string_array, as_string_view_array, as_struct_array, + as_uint32_array, as_uint64_array, + }; use datafusion_common::exec_datafusion_err; use sedona_gdal::dataset::Dataset; use sedona_gdal::gdal::Gdal; @@ -133,7 +194,11 @@ mod tests { use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; use sedona_raster::traits::RasterRef; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::{ + band_indices, band_metadata_indices, metadata_indices, raster_indices, BandDataType, + StorageType, + }; + use sedona_testing::data::test_raster; use tempfile::TempDir; use crate::gdal_common::with_gdal; @@ -153,6 +218,12 @@ mod tests { dataset_to_indb_raster(&dataset) } + fn load_as_outdb_raster(gdal: &Gdal, path: &str) -> datafusion_common::Result { + let mut builder = RasterBuilder::new(1); + append_as_outdb_raster(gdal, path, &mut builder)?; + builder.finish().map_err(Into::into) + } + fn write_uint64_tiff(gdal: &Gdal, path: &str, nodata: u64, data: Vec) { let driver = gdal.get_driver_by_name("GTiff").unwrap(); let dataset = driver.create_with_band_type::(path, 2, 2, 1).unwrap(); @@ -273,6 +344,93 @@ mod tests { assert_eq!(band.data(), [1u8, 2, 3, 4, 5, 6]); } + #[test] + fn append_as_outdb_raster_reads_single_band_geotiff() { + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path)).unwrap(); + assert_eq!(raster.len(), 1); + + let metadata_struct = as_struct_array(raster.column(raster_indices::METADATA)).unwrap(); + let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(0); + let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(0); + + assert_eq!(width, 10); + assert_eq!(height, 10); + + let crs = as_string_view_array(raster.column(raster_indices::CRS)).unwrap(); + assert!(!crs.is_null(0)); + + let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); + let bands_struct = as_struct_array(bands_list.values()).unwrap(); + let band_metadata_struct = + as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); + + let outdb_url = + as_string_array(band_metadata_struct.column(band_metadata_indices::OUTDB_URL)).unwrap(); + assert!(!outdb_url.is_null(0)); + assert!(outdb_url.value(0).contains("test4.tiff")); + + let storage_type = + as_uint32_array(band_metadata_struct.column(band_metadata_indices::STORAGE_TYPE)) + .unwrap(); + assert_eq!(storage_type.value(0), StorageType::OutDbRef as u32); + } + + #[test] + fn append_as_outdb_raster_preserves_uint64_nodata() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("uint64.tif"); + let path_str = path.to_string_lossy().to_string(); + let nodata = 9_007_199_254_740_993u64; + + with_gdal(|gdal| { + write_uint64_tiff(gdal, &path_str, nodata, vec![1, 2, 3, 4]); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path_str)).unwrap(); + let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); + let bands_struct = as_struct_array(bands_list.values()).unwrap(); + let band_metadata_struct = + as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); + let nodata_values = + as_binary_array(band_metadata_struct.column(band_metadata_indices::NODATAVALUE)) + .unwrap(); + + assert_eq!(nodata_values.value(0), nodata.to_le_bytes()); + } + + #[test] + fn append_as_outdb_raster_preserves_int64_nodata() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("int64.tif"); + let path_str = path.to_string_lossy().to_string(); + let nodata = -9_007_199_254_740_993i64; + + with_gdal(|gdal| { + write_int64_tiff(gdal, &path_str, nodata, vec![-1, -2, -3, -4]); + Ok::<_, datafusion_common::DataFusionError>(()) + }) + .unwrap(); + + let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path_str)).unwrap(); + let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); + let bands_struct = as_struct_array(bands_list.values()).unwrap(); + let band_metadata_struct = + as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); + let nodata_values = + as_binary_array(band_metadata_struct.column(band_metadata_indices::NODATAVALUE)) + .unwrap(); + + assert_eq!(nodata_values.value(0), nodata.to_le_bytes()); + } + #[test] fn dataset_to_indb_raster_preserves_uint64_nodata_and_data() { let temp_dir = TempDir::new().unwrap(); From cfe08e1bbff760d2b73cbfc77c8ad6576e3ef409 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Wed, 13 May 2026 10:32:58 +0800 Subject: [PATCH 4/8] fix(rust/sedona-raster-gdal): cover RS_FromPath edge cases --- .../sedona-raster-gdal/benches/rs_frompath.rs | 2 +- rust/sedona-raster-gdal/src/lib.rs | 4 +- rust/sedona-raster-gdal/src/rs_frompath.rs | 59 ++++++++++++++++++- rust/sedona-raster-gdal/src/utils.rs | 2 +- 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/rust/sedona-raster-gdal/benches/rs_frompath.rs b/rust/sedona-raster-gdal/benches/rs_frompath.rs index 93b213b53..65291eb4a 100644 --- a/rust/sedona-raster-gdal/benches/rs_frompath.rs +++ b/rust/sedona-raster-gdal/benches/rs_frompath.rs @@ -56,7 +56,7 @@ fn raster_path_array(names: &[&str], rows: usize) -> ArrayRef { } fn bench_rs_frompath(c: &mut Criterion) { - let udf: ScalarUDF = sedona_raster_gdal::rs_from_path_udf().into(); + let udf: ScalarUDF = sedona_raster_gdal::rs_frompath_udf().into(); let tester = ScalarUdfTester::new(udf, vec![SedonaType::Arrow(DataType::Utf8)]); let single_small = raster_path_array(&["test4.tiff"], 1); diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 52f286fd8..0dd459184 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -41,9 +41,9 @@ pub use gdal_common::{ band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size, nodata_bytes_to_f64, nodata_f64_to_bytes, }; -pub use rs_frompath::rs_from_path_udf; +pub use rs_frompath::rs_frompath_udf; pub use utils::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster}; pub fn all_gdal_udfs() -> Vec { - vec![rs_from_path_udf()] + vec![rs_frompath_udf()] } diff --git a/rust/sedona-raster-gdal/src/rs_frompath.rs b/rust/sedona-raster-gdal/src/rs_frompath.rs index 31b5edd52..2fd5945e9 100644 --- a/rust/sedona-raster-gdal/src/rs_frompath.rs +++ b/rust/sedona-raster-gdal/src/rs_frompath.rs @@ -35,7 +35,7 @@ use crate::gdal_common::with_gdal; use crate::gdal_dataset_provider::configure_thread_local_options; use crate::utils::append_as_outdb_raster; -pub fn rs_from_path_udf() -> SedonaScalarUDF { +pub fn rs_frompath_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "rs_frompath", vec![Arc::new(RsFromPath)], @@ -103,10 +103,16 @@ mod tests { use arrow_array::StringArray; use datafusion_common::cast::{as_struct_array, as_uint64_array}; use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDFImpl; use sedona_expr::scalar_udf::SedonaScalarKernel; use sedona_schema::raster::{metadata_indices, raster_indices}; use sedona_testing::data::test_raster; + #[test] + fn test_rs_from_path_udf_name() { + assert_eq!(rs_frompath_udf().name(), "rs_frompath"); + } + fn assert_raster_dimensions( result: &ColumnarValue, expected_len: usize, @@ -213,4 +219,55 @@ mod tests { other => panic!("Expected empty array result, got {other:?}"), } } + + #[test] + fn test_invoke_rs_from_path_propagates_nulls() { + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let input = + ColumnarValue::Array(Arc::new(StringArray::from(vec![Some(path.as_str()), None]))); + + let result = RsFromPath + .invoke_batch_from_args(&[], &[input], &SedonaType::Arrow(DataType::Null), 0, None) + .expect("Should invoke successfully for null-containing input"); + + match result { + ColumnarValue::Array(arr) => { + let struct_arr = as_struct_array(&arr).unwrap(); + assert_eq!(struct_arr.len(), 2); + assert!(!struct_arr.is_null(0)); + assert!(struct_arr.is_null(1)); + + let metadata_struct = + as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); + let actual_width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) + .unwrap() + .value(0); + let actual_height = + as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) + .unwrap() + .value(0); + + assert_eq!(actual_width, 10); + assert_eq!(actual_height, 10); + } + other => panic!("Expected array result, got {other:?}"), + } + } + + #[test] + fn test_invoke_rs_from_path_invalid_path_errors() { + let missing_path = "/definitely/missing/rs_from_path_test.tif"; + let input = ColumnarValue::Scalar(ScalarValue::Utf8(Some(missing_path.to_string()))); + + let err = RsFromPath + .invoke_batch_from_args(&[], &[input], &SedonaType::Arrow(DataType::Null), 0, None) + .expect_err("Missing path should return an error"); + + let err_message = err.to_string(); + assert!(err_message.contains(&format!( + "Failed to open raster file '{}' (GDAL path '{}')", + missing_path, missing_path + ))); + } } diff --git a/rust/sedona-raster-gdal/src/utils.rs b/rust/sedona-raster-gdal/src/utils.rs index d7cc4bc02..3232dad8d 100644 --- a/rust/sedona-raster-gdal/src/utils.rs +++ b/rust/sedona-raster-gdal/src/utils.rs @@ -117,7 +117,7 @@ pub fn append_as_outdb_raster(gdal: &Gdal, path: &str, builder: &mut RasterBuild ) .map_err(|e| { exec_datafusion_err!( - "Failed to open raster file '{}'(GDAL path '{}'): {}", + "Failed to open raster file '{}' (GDAL path '{}'): {}", path, gdal_path, e From 7af8c93b604ca3d82d425b07d2ce3ca56c832521 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Wed, 13 May 2026 11:36:24 +0800 Subject: [PATCH 5/8] fix(docs/reference/sql): use checked-in raster for RS_FromPath --- docs/reference/sql/rs_frompath.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/sql/rs_frompath.qmd b/docs/reference/sql/rs_frompath.qmd index cba038e0d..7d3855c9d 100644 --- a/docs/reference/sql/rs_frompath.qmd +++ b/docs/reference/sql/rs_frompath.qmd @@ -36,5 +36,5 @@ their pixel data into the raster value itself. ## Examples ```sql -SELECT RS_BandPath(RS_FromPath('/tmp/example.tif')); +SELECT RS_BandPath(RS_FromPath('../../../submodules/sedona-testing/data/raster/test4.tiff')); ``` From ebb7f15cb2eb18625d34700680fcd453446cd832 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Sat, 16 May 2026 02:20:29 +0800 Subject: [PATCH 6/8] fix(rust/sedona-raster-gdal): address review follow-ups --- Cargo.lock | 2 -- rust/sedona-raster-gdal/Cargo.toml | 2 -- rust/sedona-raster-gdal/src/lib.rs | 6 ++-- rust/sedona-raster-gdal/src/register.rs | 25 +++++++++++++ rust/sedona-raster-gdal/src/rs_frompath.rs | 42 +++++++++++++--------- rust/sedona/src/context.rs | 4 +-- 6 files changed, 54 insertions(+), 27 deletions(-) create mode 100644 rust/sedona-raster-gdal/src/register.rs diff --git a/Cargo.lock b/Cargo.lock index faa3ab25d..6cd6c8129 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6128,7 +6128,6 @@ dependencies = [ name = "sedona-raster-gdal" version = "0.4.0" dependencies = [ - "arrow", "arrow-array", "arrow-buffer", "arrow-schema", @@ -6140,7 +6139,6 @@ dependencies = [ "sedona-expr", "sedona-gdal", "sedona-raster", - "sedona-raster-functions", "sedona-schema", "sedona-testing", "tempfile", diff --git a/rust/sedona-raster-gdal/Cargo.toml b/rust/sedona-raster-gdal/Cargo.toml index 78c51190c..5d7956f29 100644 --- a/rust/sedona-raster-gdal/Cargo.toml +++ b/rust/sedona-raster-gdal/Cargo.toml @@ -31,7 +31,6 @@ rust-version.workspace = true result_large_err = "allow" [dependencies] -arrow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-schema = { workspace = true } @@ -42,7 +41,6 @@ sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-gdal = { workspace = true } sedona-raster = { workspace = true } -sedona-raster-functions = { workspace = true } sedona-schema = { workspace = true } [dev-dependencies] diff --git a/rust/sedona-raster-gdal/src/lib.rs b/rust/sedona-raster-gdal/src/lib.rs index 0dd459184..360320b56 100644 --- a/rust/sedona-raster-gdal/src/lib.rs +++ b/rust/sedona-raster-gdal/src/lib.rs @@ -25,6 +25,8 @@ //! - GDAL datatype and nodata conversion helpers //! - path normalization for GDAL VSI-backed raster sources +pub mod register; + mod gdal_common; // Temporary until https://github.com/apache/sedona-db/issues/804 is resolved. #[allow(dead_code)] @@ -43,7 +45,3 @@ pub use gdal_common::{ }; pub use rs_frompath::rs_frompath_udf; pub use utils::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster}; - -pub fn all_gdal_udfs() -> Vec { - vec![rs_frompath_udf()] -} diff --git a/rust/sedona-raster-gdal/src/register.rs b/rust/sedona-raster-gdal/src/register.rs new file mode 100644 index 000000000..4db9cf56d --- /dev/null +++ b/rust/sedona-raster-gdal/src/register.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use sedona_expr::function_set::FunctionSet; + +/// Export the set of GDAL-backed functions defined in this crate. +pub fn default_function_set() -> FunctionSet { + let mut function_set = FunctionSet::new(); + function_set.insert_scalar_udf(crate::rs_frompath::rs_frompath_udf()); + function_set +} diff --git a/rust/sedona-raster-gdal/src/rs_frompath.rs b/rust/sedona-raster-gdal/src/rs_frompath.rs index 2fd5945e9..7ef995f30 100644 --- a/rust/sedona-raster-gdal/src/rs_frompath.rs +++ b/rust/sedona-raster-gdal/src/rs_frompath.rs @@ -24,6 +24,7 @@ use arrow_schema::DataType; use datafusion_common::cast::as_string_array; use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; +use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; @@ -56,34 +57,44 @@ impl SedonaScalarKernel for RsFromPath { _arg_types: &[SedonaType], args: &[ColumnarValue], _return_type: &SedonaType, - _num_rows: usize, + num_rows: usize, config_options: Option<&ConfigOptions>, ) -> Result { with_gdal(|gdal| { configure_thread_local_options(gdal, config_options)?; - let paths = args[0].cast_to(&DataType::Utf8, None)?.into_array(1)?; + let num_iterations = args + .iter() + .find_map(|arg| match arg { + ColumnarValue::Array(array) => Some(array.len()), + ColumnarValue::Scalar(_) => None, + }) + .unwrap_or_else(|| num_rows.max(1)); + + let paths = args[0] + .cast_to(&DataType::Utf8, None)? + .into_array_of_size(num_iterations)?; let path_array = as_string_array(&paths)?; - let len = path_array.len(); - let mut builder = RasterBuilder::new(len); - for i in 0..len { - if path_array.is_null(i) { - builder.append_null()?; - } else { - let path = path_array.value(i); + let mut builder = RasterBuilder::new(path_array.len()); + for path_opt in path_array { + if let Some(path) = path_opt { append_as_outdb_raster(gdal, path, &mut builder)?; + } else { + builder.append_null()?; } } let result: Arc = Arc::new(builder.finish()?); - match &args[0] { - ColumnarValue::Scalar(_) => { - let scalar = datafusion_common::ScalarValue::try_from_array(&result, 0)?; - Ok(ColumnarValue::Scalar(scalar)) - } - ColumnarValue::Array(_) => Ok(ColumnarValue::Array(result)), + match args + .iter() + .any(|arg| matches!(arg, ColumnarValue::Array(_))) + { + true => Ok(ColumnarValue::Array(result)), + false => Ok(ColumnarValue::Scalar(ScalarValue::try_from_array( + &result, 0, + )?)), } }) } @@ -102,7 +113,6 @@ mod tests { use super::*; use arrow_array::StringArray; use datafusion_common::cast::{as_struct_array, as_uint64_array}; - use datafusion_common::ScalarValue; use datafusion_expr::ScalarUDFImpl; use sedona_expr::scalar_udf::SedonaScalarKernel; use sedona_schema::raster::{metadata_indices, raster_indices}; diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index cab44f530..5cf4d653a 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -233,9 +233,7 @@ impl SedonaContext { Arc::new(RandomGeometryFunction::default()), ); - for udf in sedona_raster_gdal::all_gdal_udfs() { - out.ctx.register_udf(udf.into()); - } + out.register_function_set(sedona_raster_gdal::register::default_function_set()); // Always register default function set out.register_function_set(sedona_functions::register::default_function_set()); From 2b03dd86f7745066ad3ff35bd26a96241a8770aa Mon Sep 17 00:00:00 2001 From: kontinuation Date: Sun, 31 May 2026 15:37:32 +0800 Subject: [PATCH 7/8] Fix compilation and tests after rebasing --- rust/sedona-raster-gdal/src/rs_frompath.rs | 73 +++++++----------- rust/sedona-raster-gdal/src/utils.rs | 89 ++++++++-------------- 2 files changed, 58 insertions(+), 104 deletions(-) diff --git a/rust/sedona-raster-gdal/src/rs_frompath.rs b/rust/sedona-raster-gdal/src/rs_frompath.rs index 7ef995f30..6517d269d 100644 --- a/rust/sedona-raster-gdal/src/rs_frompath.rs +++ b/rust/sedona-raster-gdal/src/rs_frompath.rs @@ -111,11 +111,11 @@ impl SedonaScalarKernel for RsFromPath { #[cfg(test)] mod tests { use super::*; - use arrow_array::StringArray; - use datafusion_common::cast::{as_struct_array, as_uint64_array}; + use arrow_array::{StringArray, StructArray}; + use datafusion_common::cast::as_struct_array; use datafusion_expr::ScalarUDFImpl; - use sedona_expr::scalar_udf::SedonaScalarKernel; - use sedona_schema::raster::{metadata_indices, raster_indices}; + use sedona_raster::array::RasterStructArray; + use sedona_raster::traits::RasterRef; use sedona_testing::data::test_raster; #[test] @@ -129,42 +129,29 @@ mod tests { width: u64, height: u64, ) { + fn assert_struct_array_dimensions( + struct_arr: &StructArray, + expected_len: usize, + width: u64, + height: u64, + ) { + let raster_array = RasterStructArray::new(struct_arr); + assert_eq!(raster_array.len(), expected_len); + + for idx in 0..expected_len { + let raster = raster_array.get(idx).unwrap(); + assert_eq!(raster.metadata().width(), width); + assert_eq!(raster.metadata().height(), height); + } + } + match result { ColumnarValue::Array(arr) => { let struct_arr = as_struct_array(arr).unwrap(); - assert_eq!(struct_arr.len(), expected_len); - - let metadata_struct = - as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); - for idx in 0..expected_len { - let actual_width = - as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(idx); - let actual_height = - as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(idx); - - assert_eq!(actual_width, width); - assert_eq!(actual_height, height); - } + assert_struct_array_dimensions(struct_arr, expected_len, width, height); } ColumnarValue::Scalar(ScalarValue::Struct(struct_arr)) => { - assert_eq!(struct_arr.len(), 1); - - let metadata_struct = - as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); - let actual_width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(0); - let actual_height = - as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(0); - - assert_eq!(actual_width, width); - assert_eq!(actual_height, height); + assert_struct_array_dimensions(struct_arr, expected_len, width, height); } other => panic!("Unexpected result: {other:?}"), } @@ -248,18 +235,10 @@ mod tests { assert!(!struct_arr.is_null(0)); assert!(struct_arr.is_null(1)); - let metadata_struct = - as_struct_array(struct_arr.column(raster_indices::METADATA)).unwrap(); - let actual_width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(0); - let actual_height = - as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(0); - - assert_eq!(actual_width, 10); - assert_eq!(actual_height, 10); + let raster_array = RasterStructArray::new(struct_arr); + let raster = raster_array.get(0).unwrap(); + assert_eq!(raster.metadata().width(), 10); + assert_eq!(raster.metadata().height(), 10); } other => panic!("Expected array result, got {other:?}"), } diff --git a/rust/sedona-raster-gdal/src/utils.rs b/rust/sedona-raster-gdal/src/utils.rs index 3232dad8d..65f193111 100644 --- a/rust/sedona-raster-gdal/src/utils.rs +++ b/rust/sedona-raster-gdal/src/utils.rs @@ -180,11 +180,7 @@ pub fn dataset_to_indb_raster(dataset: &Dataset) -> Result { mod tests { use super::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster}; - use arrow_array::{Array, StructArray}; - use datafusion_common::cast::{ - as_binary_array, as_list_array, as_string_array, as_string_view_array, as_struct_array, - as_uint32_array, as_uint64_array, - }; + use arrow_array::StructArray; use datafusion_common::exec_datafusion_err; use sedona_gdal::dataset::Dataset; use sedona_gdal::gdal::Gdal; @@ -194,10 +190,7 @@ mod tests { use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; use sedona_raster::traits::RasterRef; - use sedona_schema::raster::{ - band_indices, band_metadata_indices, metadata_indices, raster_indices, BandDataType, - StorageType, - }; + use sedona_schema::raster::{BandDataType, StorageType}; use sedona_testing::data::test_raster; use tempfile::TempDir; @@ -349,36 +342,20 @@ mod tests { let path = test_raster("test4.tiff").expect("test4.tiff should exist"); let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path)).unwrap(); - assert_eq!(raster.len(), 1); - - let metadata_struct = as_struct_array(raster.column(raster_indices::METADATA)).unwrap(); - let width = as_uint64_array(metadata_struct.column(metadata_indices::WIDTH)) - .unwrap() - .value(0); - let height = as_uint64_array(metadata_struct.column(metadata_indices::HEIGHT)) - .unwrap() - .value(0); - - assert_eq!(width, 10); - assert_eq!(height, 10); - - let crs = as_string_view_array(raster.column(raster_indices::CRS)).unwrap(); - assert!(!crs.is_null(0)); - - let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); - let bands_struct = as_struct_array(bands_list.values()).unwrap(); - let band_metadata_struct = - as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); - - let outdb_url = - as_string_array(band_metadata_struct.column(band_metadata_indices::OUTDB_URL)).unwrap(); - assert!(!outdb_url.is_null(0)); - assert!(outdb_url.value(0).contains("test4.tiff")); - - let storage_type = - as_uint32_array(band_metadata_struct.column(band_metadata_indices::STORAGE_TYPE)) - .unwrap(); - assert_eq!(storage_type.value(0), StorageType::OutDbRef as u32); + let raster_struct = RasterStructArray::new(&raster); + assert_eq!(raster_struct.len(), 1); + + let raster = raster_struct.get(0).unwrap(); + assert_eq!(raster.metadata().width(), 10); + assert_eq!(raster.metadata().height(), 10); + assert!(raster.crs().is_some()); + + let band = raster.bands().band(1).unwrap(); + assert_eq!( + band.metadata().storage_type().unwrap(), + StorageType::OutDbRef + ); + assert!(band.metadata().outdb_url().unwrap().contains("test4.tiff")); } #[test] @@ -395,15 +372,14 @@ mod tests { .unwrap(); let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path_str)).unwrap(); - let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); - let bands_struct = as_struct_array(bands_list.values()).unwrap(); - let band_metadata_struct = - as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); - let nodata_values = - as_binary_array(band_metadata_struct.column(band_metadata_indices::NODATAVALUE)) - .unwrap(); - - assert_eq!(nodata_values.value(0), nodata.to_le_bytes()); + let raster_struct = RasterStructArray::new(&raster); + let raster = raster_struct.get(0).unwrap(); + let band = raster.bands().band(1).unwrap(); + + assert_eq!( + band.metadata().nodata_value().unwrap(), + nodata.to_le_bytes() + ); } #[test] @@ -420,15 +396,14 @@ mod tests { .unwrap(); let raster = with_gdal(|gdal| load_as_outdb_raster(gdal, &path_str)).unwrap(); - let bands_list = as_list_array(raster.column(raster_indices::BANDS)).unwrap(); - let bands_struct = as_struct_array(bands_list.values()).unwrap(); - let band_metadata_struct = - as_struct_array(bands_struct.column(band_indices::METADATA)).unwrap(); - let nodata_values = - as_binary_array(band_metadata_struct.column(band_metadata_indices::NODATAVALUE)) - .unwrap(); - - assert_eq!(nodata_values.value(0), nodata.to_le_bytes()); + let raster_struct = RasterStructArray::new(&raster); + let raster = raster_struct.get(0).unwrap(); + let band = raster.bands().band(1).unwrap(); + + assert_eq!( + band.metadata().nodata_value().unwrap(), + nodata.to_le_bytes() + ); } #[test] From 57edaa8bf2f796e1394d4aafe1de0511c5a83dd5 Mon Sep 17 00:00:00 2001 From: kontinuation Date: Sun, 31 May 2026 19:00:54 +0800 Subject: [PATCH 8/8] fix(rust/sedona-raster-gdal): use executor shape handling in RS_FromPath --- Cargo.lock | 1 + rust/sedona-raster-gdal/Cargo.toml | 1 + rust/sedona-raster-gdal/src/rs_frompath.rs | 47 ++++++++++++---------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6cd6c8129..2331e62ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6137,6 +6137,7 @@ dependencies = [ "lru 0.18.0", "sedona-common", "sedona-expr", + "sedona-functions", "sedona-gdal", "sedona-raster", "sedona-schema", diff --git a/rust/sedona-raster-gdal/Cargo.toml b/rust/sedona-raster-gdal/Cargo.toml index 5d7956f29..d0fe9b9f7 100644 --- a/rust/sedona-raster-gdal/Cargo.toml +++ b/rust/sedona-raster-gdal/Cargo.toml @@ -39,6 +39,7 @@ datafusion-expr = { workspace = true } lru = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } +sedona-functions = { workspace = true } sedona-gdal = { workspace = true } sedona-raster = { workspace = true } sedona-schema = { workspace = true } diff --git a/rust/sedona-raster-gdal/src/rs_frompath.rs b/rust/sedona-raster-gdal/src/rs_frompath.rs index 6517d269d..1bf0f3f19 100644 --- a/rust/sedona-raster-gdal/src/rs_frompath.rs +++ b/rust/sedona-raster-gdal/src/rs_frompath.rs @@ -24,10 +24,10 @@ use arrow_schema::DataType; use datafusion_common::cast::as_string_array; use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; -use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_functions::executor::WkbBytesExecutor; use sedona_raster::builder::RasterBuilder; use sedona_schema::datatypes::{SedonaType, RASTER}; use sedona_schema::matchers::ArgMatcher; @@ -54,26 +54,19 @@ impl SedonaScalarKernel for RsFromPath { fn invoke_batch_from_args( &self, - _arg_types: &[SedonaType], + arg_types: &[SedonaType], args: &[ColumnarValue], _return_type: &SedonaType, - num_rows: usize, + _num_rows: usize, config_options: Option<&ConfigOptions>, ) -> Result { with_gdal(|gdal| { configure_thread_local_options(gdal, config_options)?; - - let num_iterations = args - .iter() - .find_map(|arg| match arg { - ColumnarValue::Array(array) => Some(array.len()), - ColumnarValue::Scalar(_) => None, - }) - .unwrap_or_else(|| num_rows.max(1)); + let executor = WkbBytesExecutor::new(arg_types, args); let paths = args[0] .cast_to(&DataType::Utf8, None)? - .into_array_of_size(num_iterations)?; + .into_array_of_size(executor.num_iterations())?; let path_array = as_string_array(&paths)?; let mut builder = RasterBuilder::new(path_array.len()); @@ -86,16 +79,7 @@ impl SedonaScalarKernel for RsFromPath { } let result: Arc = Arc::new(builder.finish()?); - - match args - .iter() - .any(|arg| matches!(arg, ColumnarValue::Array(_))) - { - true => Ok(ColumnarValue::Array(result)), - false => Ok(ColumnarValue::Scalar(ScalarValue::try_from_array( - &result, 0, - )?)), - } + executor.finish(result) }) } @@ -113,6 +97,7 @@ mod tests { use super::*; use arrow_array::{StringArray, StructArray}; use datafusion_common::cast::as_struct_array; + use datafusion_common::ScalarValue; use datafusion_expr::ScalarUDFImpl; use sedona_raster::array::RasterStructArray; use sedona_raster::traits::RasterRef; @@ -259,4 +244,22 @@ mod tests { missing_path, missing_path ))); } + + #[test] + fn test_invoke_rs_from_path_scalar_ignores_num_rows_for_shape() { + let path = test_raster("test4.tiff").expect("test4.tiff should exist"); + + let result = RsFromPath + .invoke_batch_from_args( + &[], + &[ColumnarValue::Scalar(ScalarValue::Utf8(Some(path)))], + &SedonaType::Arrow(DataType::Null), + 32, + None, + ) + .expect("Should invoke successfully for scalar path with larger num_rows"); + + assert!(matches!(result, ColumnarValue::Scalar(_))); + assert_raster_dimensions(&result, 1, 10, 10); + } }