Skip to content

Commit 9c30e16

Browse files
committed
Simplify by sorting and grouping *once*
There were several places in the flow where we were doing similar group_by/order_by. Because GeoFloat isn't Ord, we introduce a total_ord wrapper Also some renames for clarity. "edge" == "ring"
1 parent e6a561c commit 9c30e16

File tree

1 file changed

+112
-182
lines changed

1 file changed

+112
-182
lines changed

geo/src/algorithm/validation/polygon.rs

Lines changed: 112 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ use super::{CoordIndex, RingRole, Validation, utils};
22
use crate::coordinate_position::CoordPos;
33
use crate::dimensions::Dimensions;
44
use crate::relate::geomgraph::GeometryGraph;
5-
use crate::{Coord, GeoFloat, HasDimensions, Polygon, PreparedGeometry, Relate};
5+
use crate::{GeoFloat, HasDimensions, Polygon, PreparedGeometry, Relate};
6+
7+
use total_ord_coord::TotalOrdCoord;
8+
use union_find::UnionFind;
69

710
use std::cell::RefCell;
8-
use std::cmp::Ordering;
9-
use std::collections::{BTreeSet, HashSet};
11+
use std::collections::{BTreeMap, BTreeSet};
1012
use std::fmt;
1113

1214
/// A [`Polygon`] must follow these rules to be valid:
@@ -231,215 +233,143 @@ fn edge_index_to_ring_role(edge_idx: usize) -> RingRole {
231233
fn check_interior_simply_connected_from_graph<F: GeoFloat>(
232234
graph: &GeometryGraph<F>,
233235
) -> Option<(usize, usize)> {
234-
let edges = graph.edges();
235-
if edges.len() < 2 {
236+
let rings = graph.edges();
237+
if rings.len() < 2 {
236238
return None;
237239
}
238240

239-
// Collect all intersection points with their edge index and vertex status.
240-
struct IntersectionInfo<F: GeoFloat> {
241-
coord: Coord<F>,
242-
ring_idx: usize,
243-
is_vertex: bool,
244-
}
245-
246-
let mut all_intersections: Vec<IntersectionInfo<F>> = Vec::new();
247-
for (ring_idx, ring_edge) in edges.iter().enumerate() {
241+
// Group intersections by coordinate
242+
// Coord => (
243+
// 0: All the rings which intersect at this coordinate
244+
// 1: If at least one of these intersecting rings has this coordinate as a vertex
245+
// )
246+
let mut all_intersections: BTreeMap<TotalOrdCoord<F>, (BTreeSet<usize>, bool)> =
247+
BTreeMap::new();
248+
for (ring_idx, ring_edge) in rings.iter().enumerate() {
248249
let ring_edge = RefCell::borrow(ring_edge);
249250
let coords = ring_edge.coords();
250251
for ei in ring_edge.edge_intersections() {
251252
let coord = ei.coordinate();
252253
let is_vertex = coords.contains(&coord);
253-
all_intersections.push(IntersectionInfo {
254-
coord,
255-
ring_idx,
256-
is_vertex,
257-
});
258-
}
259-
}
260-
261-
// Sort by (coordinate, edge_idx) using total_cmp for consistent ordering.
262-
all_intersections.sort_by(|a, b| {
263-
a.coord
264-
.x
265-
.total_cmp(&b.coord.x)
266-
.then_with(|| a.coord.y.total_cmp(&b.coord.y))
267-
.then_with(|| a.ring_idx.cmp(&b.ring_idx))
268-
});
269-
270-
// Group all intersections sharing the same coordinate and build:
271-
// - coord_edges: (coord, edge_a, edge_b) for each touch, used by cycle detection
272-
// - ring_pair_seen: if a pair is seen a second time, it shares 2+ touch
273-
// points at different coordinates, which always disconnects the interior
274-
let mut coord_edges: Vec<(Coord<F>, usize, usize)> = Vec::new();
275-
let mut ring_pair_seen: HashSet<(usize, usize)> = HashSet::new();
276-
let mut intersecting_rings: BTreeSet<usize> = BTreeSet::new();
277-
let mut i = 0;
278-
while i < all_intersections.len() {
279-
let current_coord = all_intersections[i].coord;
280-
281-
// Find the range of intersections sharing this coordinate.
282-
let mut j = i + 1;
283-
while j < all_intersections.len() && all_intersections[j].coord == current_coord {
284-
j += 1;
254+
let (intersecting_rings, any_is_vertex) = all_intersections
255+
.entry(TotalOrdCoord(coord))
256+
.or_insert((BTreeSet::new(), false));
257+
intersecting_rings.insert(ring_idx);
258+
*any_is_vertex |= is_vertex;
285259
}
286-
let all_intersections_at_coord = &all_intersections[i..j];
287-
288-
// Deduplicate edges within the group. The same edge can appear multiple
289-
// times with different vertex flags, so we merge them to ensure we only
290-
// count distinct edges and know if any of them is a vertex.
291-
intersecting_rings.clear();
292-
let mut intersection_is_on_a_vertex = false;
293-
for intersection in all_intersections_at_coord {
294-
intersection_is_on_a_vertex |= intersection.is_vertex;
295-
intersecting_rings.insert(intersection.ring_idx);
296-
}
297-
298-
// A "touch" requires 2+ distinct edges, at least one having the point
299-
// as a vertex.
300-
if intersection_is_on_a_vertex {
301-
for (idx_a, edge_a) in intersecting_rings.iter().enumerate() {
302-
for edge_b in intersecting_rings.iter().skip(idx_a + 1) {
303-
debug_assert!(edge_a < edge_b);
304-
let key = (*edge_a, *edge_b);
305-
if !ring_pair_seen.insert(key) {
306-
return Some(key);
307-
}
308-
coord_edges.push((current_coord, *edge_a, *edge_b));
309-
}
310-
}
311-
}
312-
313-
i = j;
314260
}
315261

316-
check_ring_touches_disconnect_interior(&mut coord_edges, edges.len())
317-
}
318-
319-
/// Detect cycles through distinct coordinates in the ring touch graph.
320-
///
321-
/// Each entry in `coord_edges` is `(coord, ring_a, ring_b)` representing a
322-
/// single-touch between two rings at the given coordinate. The caller has
323-
/// already handled the case where any pair shares 2+ touch points.
324-
///
325-
/// A cycle of ring touches only disconnects the interior if the cycle passes
326-
/// through at least two distinct coordinates (enclosing area). A cycle where
327-
/// every edge shares the same coordinate — e.g. three holes all meeting at
328-
/// one point — does not enclose any area and is harmless.
329-
///
330-
/// To distinguish these cases, edges are sorted and grouped by coordinate,
331-
/// then processed with two Union-Find structures:
332-
///
333-
/// - **Global UF**: accumulates connectivity across all coordinate groups
334-
/// processed so far. "Are these two rings connected by ANY path of touches?"
335-
/// - **Local UF**: reset for each coordinate group. "Are these two rings
336-
/// connected through touches at THIS coordinate only?"
337-
///
338-
/// For each edge `(u, v)` in the current coordinate group:
339-
/// - `!global.connected(u, v)`: first connection between these components — no
340-
/// cycle at all. **Safe.**
341-
/// - `global.connected(u, v) && local.connected(u, v)`: the rings are already
342-
/// connected within this coordinate group — a same-coordinate cycle. **Safe.**
343-
/// - `global.connected(u, v) && !local.connected(u, v)`: the rings are
344-
/// reachable through a previously processed (different) coordinate, but not
345-
/// through this one — closing a multi-coordinate cycle. **Invalid.**
346-
///
347-
/// # Params
348-
///
349-
/// `coord_edges`: Coord and the two rings touching at that coord
350-
/// `num_rings`: Number of rings in the Polygon
351-
///
352-
/// # Returns
353-
///
354-
/// The rings which disconnect the interior, or None if the interior is connected.
355-
fn check_ring_touches_disconnect_interior<F: GeoFloat>(
356-
coord_edges: &mut [(Coord<F>, usize, usize)],
357-
num_rings: usize,
358-
) -> Option<(usize, usize)> {
359-
coord_edges.sort_by(|a, b| {
360-
a.0.x
361-
.total_cmp(&b.0.x)
362-
.then_with(|| a.0.y.total_cmp(&b.0.y))
363-
});
364-
365262
// Which rings are connected, even if vicariously, through other rings.
366-
let mut global = UnionFind::new(num_rings);
367-
let mut group_start = 0;
368-
while group_start < coord_edges.len() {
369-
let mut group_end = group_start + 1;
370-
while group_end < coord_edges.len()
371-
&& coord_edges[group_end].0 == coord_edges[group_start].0
372-
{
373-
group_end += 1;
263+
let mut global = UnionFind::new(rings.len());
264+
for (_intersection_coord, (intersecting_rings, intersection_is_on_a_vertex)) in
265+
all_intersections
266+
{
267+
if !intersection_is_on_a_vertex {
268+
continue;
374269
}
375-
376-
// Which rings are touching at *this* coordinate
377-
let mut local = UnionFind::new(num_rings);
378-
for &(_, u, v) in &coord_edges[group_start..group_end] {
379-
if global.find_root(u) == global.find_root(v)
380-
&& local.find_root(u) != local.find_root(v)
270+
// Which rings are connected at *this* coordinate
271+
let mut local = UnionFind::new(rings.len());
272+
273+
let mut intersecting_rings = intersecting_rings.into_iter();
274+
let Some(first) = intersecting_rings.next() else {
275+
continue;
276+
};
277+
for next in intersecting_rings {
278+
if global.find_root(first) == global.find_root(next)
279+
&& local.find_root(first) != local.find_root(next)
381280
{
382-
return Some((u.min(v), u.max(v)));
281+
// These rings have touched before, we have a disconnected interior.
282+
return Some((first.min(next), first.max(next)));
283+
} else {
284+
global.union_sets(first, next);
285+
local.union_sets(first, next);
383286
}
384-
global.union_sets(u, v);
385-
local.union_sets(u, v);
386287
}
387-
388-
group_start = group_end;
389288
}
390-
391289
None
392290
}
393291

394-
/// Union Find is a classic algorithm for managing disjoint sets - i.e. which rings are touching.
395-
///
396-
/// Includes Path Compression and Rank counting optimizations.
397-
struct UnionFind {
398-
parent: Vec<usize>,
399-
rank: Vec<usize>,
400-
}
292+
mod total_ord_coord {
293+
use crate::{Coord, GeoFloat};
294+
use std::cmp::Ordering;
295+
296+
#[derive(Debug, Clone, Copy)]
297+
pub(super) struct TotalOrdCoord<F: GeoFloat>(pub Coord<F>);
401298

402-
impl UnionFind {
403-
fn new(n: usize) -> Self {
404-
Self {
405-
parent: (0..n).collect(),
406-
rank: vec![0; n],
299+
impl<F: GeoFloat> PartialEq<Self> for TotalOrdCoord<F> {
300+
fn eq(&self, other: &Self) -> bool {
301+
self.cmp(other) == Ordering::Equal
407302
}
408303
}
304+
impl<F: GeoFloat> Eq for TotalOrdCoord<F> {}
305+
impl<F: GeoFloat> PartialOrd for TotalOrdCoord<F> {
306+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
307+
Some(self.cmp(other))
308+
}
309+
}
310+
impl<F: GeoFloat> Ord for TotalOrdCoord<F> {
311+
fn cmp(&self, other: &Self) -> Ordering {
312+
self.0
313+
.x
314+
.total_cmp(&other.0.x)
315+
.then_with(|| self.0.y.total_cmp(&other.0.y))
316+
}
317+
}
318+
}
319+
320+
mod union_find {
321+
use std::cmp::Ordering;
409322

410-
/// Which set does `x` belong to? (identified by the set parent).
323+
/// Union Find is a classic algorithm for managing disjoint sets - i.e. which rings are touching.
411324
///
412-
/// Rings that are touching will have the same root.
413-
/// Once all touching rings have been unioned, rings with different roots are not touching.
414-
fn find_root(&mut self, x: usize) -> usize {
415-
let mut parent = x;
416-
// A root node is its own parent
417-
while parent != self.parent[x] {
418-
parent = self.parent[self.parent[x]];
419-
// compress path to make `find_root` faster next time
420-
self.parent[x] = parent;
421-
}
422-
parent
325+
/// Includes Path Compression and Rank counting optimizations.
326+
pub(super) struct UnionFind {
327+
parent: Vec<usize>,
328+
rank: Vec<usize>,
423329
}
424330

425-
/// If two rings are touching, union their two "touchable" sets.
426-
fn union_sets(&mut self, x: usize, y: usize) {
427-
let root_x = self.find_root(x);
428-
let root_y = self.find_root(y);
429-
if root_x == root_y {
430-
// Already in same set
431-
return;
331+
impl UnionFind {
332+
pub(crate) fn new(n: usize) -> Self {
333+
Self {
334+
parent: (0..n).collect(),
335+
rank: vec![0; n],
336+
}
432337
}
433-
match self.rank[root_x].cmp(&self.rank[root_y]) {
434-
Ordering::Less => {
435-
self.parent[root_x] = root_y;
338+
339+
/// Which set does `x` belong to? (identified by the set parent).
340+
///
341+
/// Rings that are touching will have the same root.
342+
/// Once all touching rings have been unioned, rings with different roots are not touching.
343+
pub(crate) fn find_root(&mut self, x: usize) -> usize {
344+
let mut parent = x;
345+
// A root node is its own parent
346+
while parent != self.parent[x] {
347+
parent = self.parent[self.parent[x]];
348+
// compress path to make `find_root` faster next time
349+
self.parent[x] = parent;
436350
}
437-
Ordering::Greater => {
438-
self.parent[root_y] = root_x;
351+
parent
352+
}
353+
354+
/// If two rings are touching, union their two "touchable" sets.
355+
pub(crate) fn union_sets(&mut self, x: usize, y: usize) {
356+
let root_x = self.find_root(x);
357+
let root_y = self.find_root(y);
358+
if root_x == root_y {
359+
// Already in same set
360+
return;
439361
}
440-
Ordering::Equal => {
441-
self.parent[root_y] = root_x;
442-
self.rank[root_x] += 1;
362+
match self.rank[root_x].cmp(&self.rank[root_y]) {
363+
Ordering::Less => {
364+
self.parent[root_x] = root_y;
365+
}
366+
Ordering::Greater => {
367+
self.parent[root_y] = root_x;
368+
}
369+
Ordering::Equal => {
370+
self.parent[root_y] = root_x;
371+
self.rank[root_x] += 1;
372+
}
443373
}
444374
}
445375
}

0 commit comments

Comments
 (0)