diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index d05367d0d..05719a048 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -50,3 +50,4 @@ mod st_simplifypreservetopology; mod st_snap; mod st_unaryunion; pub mod wkb_to_geos; +mod st_hausdorffdistance; diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs index 1b9583f4d..28b065cf3 100644 --- a/c/sedona-geos/src/register.rs +++ b/c/sedona-geos/src/register.rs @@ -64,6 +64,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, Vec)> { "st_isvalidreason" => crate::st_isvalidreason::st_is_valid_reason_impl, "st_length" => crate::st_length::st_length_impl, "st_linemerge" => crate::st_line_merge::st_line_merge_impl, + "st_hausdorffdistance" => crate::st_hausdorffdistance::st_hausdorff_distance_impl, "st_makevalid" => crate::st_makevalid::st_make_valid_impl, "st_minimumclearance" => crate::st_minimumclearance::st_minimum_clearance_impl, "st_minimumclearanceline" => crate::st_minimumclearance_line::st_minimum_clearance_line_impl, diff --git a/c/sedona-geos/src/st_hausdorffdistance.rs b/c/sedona-geos/src/st_hausdorffdistance.rs new file mode 100644 index 000000000..c281c6905 --- /dev/null +++ b/c/sedona-geos/src/st_hausdorffdistance.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use arrow_array::builder::Float64Builder; +use arrow_schema::DataType; +use datafusion_common::{error::Result, DataFusionError}; +use datafusion_expr::{ColumnarValue, Volatility}; + +use ::geos::Geom; + +use sedona_expr::{ + item_crs::ItemCrsKernel, + scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}, +}; +use sedona_schema::{ + datatypes::{SedonaType, WKB_GEOMETRY}, + matchers::ArgMatcher, +}; +use std::sync::Arc; +use sedona_functions::executor::WkbExecutor; +use crate::executor::GeosExecutor; + +pub fn st_hausdorff_distance_impl() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_hausdorff_distance", + ItemCrsKernel::wrap_impl(vec![Arc::new(STHausdorffDistance)]), + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct STHausdorffDistance; + +impl SedonaScalarKernel for STHausdorffDistance { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Float64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let mut executor = GeosExecutor::new(arg_types, args); + let mut builder = Float64Builder::with_capacity(executor.num_iterations()); + + + executor.execute_wkb_void(|maybe_g1, maybe_g2| { + match (maybe_g1, maybe_g2) { + (Some(wkb1), Some(wkb2)) => { + + let g1 = ::geos::Geometry::try_from(wkb1) + .map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?; + let g2 = ::geos::Geometry::try_from(wkb2) + .map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?; + + let dist = g1.hausdorff_distance(&g2) + .map_err(|e| DataFusionError::Execution(format!("ST_HausdorffDistance error: {}", e)))?; + + builder.append_value(dist); + } + _ => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} \ No newline at end of file diff --git a/docs/reference/sql/st_hausdorffdistance.qmd b/docs/reference/sql/st_hausdorffdistance.qmd new file mode 100644 index 000000000..d1496f452 --- /dev/null +++ b/docs/reference/sql/st_hausdorffdistance.qmd @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +--- +title: ST_HausdorffDistance +description: Returns the Hausdorff distance between two geometries. This is a measure of how similar two geometries are. +kernels: + - returns: double + args: + - geom1: geometry + - geom2: geometry + - returns: double + args: + - geom1: geometry + - geom2: geometry + - density_frac: double +--- + +## Examples + +```sql +SELECT ST_HausdorffDistance( + ST_GeomFromWKT('LINESTRING (0 0, 2 0)'), + ST_GeomFromWKT('LINESTRING (0 1, 1 2, 2 1)') +); \ No newline at end of file diff --git a/python/sedonadb/tests/functions/st_hausdorffdistance.py b/python/sedonadb/tests/functions/st_hausdorffdistance.py new file mode 100644 index 000000000..50026af7d --- /dev/null +++ b/python/sedonadb/tests/functions/st_hausdorffdistance.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from sedonadb.testing import geom_or_null, PostGIS, SedonaDB + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom1", "geom2", "density_frac", "expected"), + [ + (None, None, None, None), + ("POINT (0 0)", None, None, None), + ("LINESTRING (0 0, 2 0)", "LINESTRING (0 1, 1 2, 2 1)", None, 1.4142135623730951), + # Case with density fraction + ("LINESTRING (0 0, 100 0)", "LINESTRING (0 0, 50 1, 100 0)", 0.5, 1.0), + # Identical geometries + ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", None, 0.0), + # Disjoint geometries + ("POINT (0 0)", "POINT (10 10)", None, 14.142135623730951), + ], +) +def test_st_hausdorff_distance(eng, geom1, geom2, density_frac, expected): + eng = eng.create_or_skip() + + if density_frac is None: + query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)})" + else: + query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)}, {density_frac})" + + eng.assert_query_result( + query, + expected, + numeric_epsilon=1e-8, + ) \ No newline at end of file