Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c/sedona-geos/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ mod st_simplifypreservetopology;
mod st_snap;
mod st_unaryunion;
pub mod wkb_to_geos;
mod st_hausdorffdistance;
1 change: 1 addition & 0 deletions c/sedona-geos/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, Vec<ScalarKernelRef>)> {
"st_isvalidreason" => crate::st_isvalidreason::st_is_valid_reason_impl,
"st_length" => crate::st_length::st_length_impl,
"st_linemerge" => crate::st_line_merge::st_line_merge_impl,
"st_hausdorffdistance" => crate::st_hausdorffdistance::st_hausdorff_distance_impl,
"st_makevalid" => crate::st_makevalid::st_make_valid_impl,
"st_minimumclearance" => crate::st_minimumclearance::st_minimum_clearance_impl,
"st_minimumclearanceline" => crate::st_minimumclearance_line::st_minimum_clearance_line_impl,
Expand Down
86 changes: 86 additions & 0 deletions c/sedona-geos/src/st_hausdorffdistance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use arrow_array::builder::Float64Builder;
use arrow_schema::DataType;
use datafusion_common::{error::Result, DataFusionError};
use datafusion_expr::{ColumnarValue, Volatility};

use ::geos::Geom;

use sedona_expr::{
item_crs::ItemCrsKernel,
scalar_udf::{SedonaScalarKernel, SedonaScalarUDF},
};
use sedona_schema::{
datatypes::{SedonaType, WKB_GEOMETRY},
matchers::ArgMatcher,
};
use std::sync::Arc;
use sedona_functions::executor::WkbExecutor;
use crate::executor::GeosExecutor;

pub fn st_hausdorff_distance_impl() -> SedonaScalarUDF {
SedonaScalarUDF::new(
"st_hausdorff_distance",
ItemCrsKernel::wrap_impl(vec![Arc::new(STHausdorffDistance)]),
Volatility::Immutable,
)
}

#[derive(Debug)]
struct STHausdorffDistance;

impl SedonaScalarKernel for STHausdorffDistance {
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
let matcher = ArgMatcher::new(
vec![ArgMatcher::is_geometry(), ArgMatcher::is_geometry()],
SedonaType::Arrow(DataType::Float64),
);
matcher.match_args(args)
}

fn invoke_batch(
&self,
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
let mut executor = GeosExecutor::new(arg_types, args);
let mut builder = Float64Builder::with_capacity(executor.num_iterations());


executor.execute_wkb_void(|maybe_g1, maybe_g2| {
match (maybe_g1, maybe_g2) {
(Some(wkb1), Some(wkb2)) => {

let g1 = ::geos::Geometry::try_from(wkb1)
.map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?;
let g2 = ::geos::Geometry::try_from(wkb2)
.map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?;

let dist = g1.hausdorff_distance(&g2)
.map_err(|e| DataFusionError::Execution(format!("ST_HausdorffDistance error: {}", e)))?;

builder.append_value(dist);
}
_ => builder.append_null(),
}
Ok(())
})?;

executor.finish(Arc::new(builder.finish()))
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add Rust tests (you can use the tests for st_distance as a template). The rust tests check for basic functionality and ensure that scalar and array arguments work everywhere.

}
38 changes: 38 additions & 0 deletions docs/reference/sql/st_hausdorffdistance.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
---
title: ST_HausdorffDistance
description: Returns the Hausdorff distance between two geometries. This is a measure of how similar two geometries are.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
description: Returns the Hausdorff distance between two geometries. This is a measure of how similar two geometries are.
description: Returns the Hausdorff distance between two geometries.

kernels:
- returns: double
args:
- geom1: geometry
- geom2: geometry
- returns: double
args:
- geom1: geometry
- geom2: geometry
- density_frac: double
Comment on lines +21 to +29
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have funny hand-rolled syntax for these, but this should work:

Suggested change
- returns: double
args:
- geom1: geometry
- geom2: geometry
- returns: double
args:
- geom1: geometry
- geom2: geometry
- density_frac: double
- returns: double
args:
- geometry
- geometry
- returns: double
args:
- geometry
- geometry
- name: density_frac
type: float64
description: Densification to apply before calculation

---

## Examples

```sql
SELECT ST_HausdorffDistance(
ST_GeomFromWKT('LINESTRING (0 0, 2 0)'),
ST_GeomFromWKT('LINESTRING (0 1, 1 2, 2 1)')
);
49 changes: 49 additions & 0 deletions python/sedonadb/tests/functions/st_hausdorffdistance.py
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be added next to wherever st_distance is tested (I'm not sure it needs its own file)

Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
from sedonadb.testing import geom_or_null, PostGIS, SedonaDB


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom1", "geom2", "density_frac", "expected"),
[
(None, None, None, None),
("POINT (0 0)", None, None, None),
Comment on lines +26 to +27
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add (None, "POINT (0 1)", None, ...), ("POINT (0 1)", "POINT (0 1)", None), and checks for EMTPY behaviour (empty linestring + non empty linestring and reversed)

("LINESTRING (0 0, 2 0)", "LINESTRING (0 1, 1 2, 2 1)", None, 1.4142135623730951),
# Case with density fraction
("LINESTRING (0 0, 100 0)", "LINESTRING (0 0, 50 1, 100 0)", 0.5, 1.0),
# Identical geometries
("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", None, 0.0),
# Disjoint geometries
("POINT (0 0)", "POINT (10 10)", None, 14.142135623730951),
],
)
def test_st_hausdorff_distance(eng, geom1, geom2, density_frac, expected):
eng = eng.create_or_skip()

if density_frac is None:
query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)})"
else:
query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)}, {density_frac})"

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you separate the tests for the version with and without density_frac (i.e., def test_st_hasdorff_distance_density_frac()? This will let us separate the "null" from "not specified" case.

eng.assert_query_result(
query,
expected,
numeric_epsilon=1e-8,
)
Loading