Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions ocrd_segment/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from scipy.ndimage import filters, morphology
import cv2
import numpy as np
from shapely.geometry import Polygon, LineString
from shapely.geometry import asPolygon, Polygon, LineString

from ocrd import Processor
from ocrd_utils import (
Expand Down Expand Up @@ -240,11 +240,13 @@ def sanitize_page(self, page, page_id):
LOG.warning('Ignoring contour %d too small (%d/%d) in region "%s"',
i, area, total_area, region.id)
continue
# simplify shape:
# simplify shape (until valid):
# can produce invalid (self-intersecting) polygons:
#polygon = cv2.approxPolyDP(contour, 2, False)[:, 0, ::] # already ordered x,y
polygon = contour[:, 0, ::] # already ordered x,y
polygon = Polygon(polygon).simplify(1).exterior.coords
polygon = Polygon(polygon).simplify(1)
polygon = make_valid(polygon)
polygon = polygon.exterior.coords[:-1] # keep open
if len(polygon) < 4:
LOG.warning('Ignoring contour %d less than 4 points in region "%s"',
i, region.id)
Expand Down Expand Up @@ -354,7 +356,13 @@ def _plausibilize_group(regionspolys, rogroup, mark_for_deletion, mark_for_mergi
# and use-cases in the future
superpoly = Polygon(polygon_from_points(superreg.get_Coords().points))
superpoly = superpoly.union(poly)
superreg.get_Coords().points = points_from_polygon(superpoly.exterior.coords)
if superpoly.type == 'MultiPolygon':
superpoly = superpoly.convex_hull
if superpoly.minimum_clearance < 1.0:
superpoly = asPolygon(np.round(superpoly.exterior.coords))
superpoly = make_valid(superpoly)
superpoly = superpoly.exterior.coords[:-1] # keep open
superreg.get_Coords().points = points_from_polygon(superpoly)
# FIXME should we merge/mix attributes and features?
if region.get_orientation() != superreg.get_orientation():
LOG.warning('Merging region "%s" with orientation %f into "%s" with %f',
Expand Down Expand Up @@ -399,3 +407,18 @@ def _plausibilize_group(regionspolys, rogroup, mark_for_deletion, mark_for_mergi
if region.parent_object_:
# remove in-place
region.parent_object_.get_TextRegion().remove(region)

def make_valid(polygon):
Comment thread
kba marked this conversation as resolved.
"""Ensures shapely.geometry.Polygon object is valid by repeated simplification"""
for split in range(1, len(polygon.exterior.coords)-1):
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
break
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(polygon.exterior.coords[-split:]+polygon.exterior.coords[:-split])
for tolerance in range(1, int(polygon.area)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance)
return polygon
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ocrd >= 2.13.1
Comment thread
bertsky marked this conversation as resolved.
Outdated
shapely
shapely >= 1.7.1
scikit-image
numpy