CoverageCleaner.java
/*
* Copyright (c) 2025 Martin Davis.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* and Eclipse Distribution License v. 1.0 which accompanies this distribution.
* The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v20.html
* and the Eclipse Distribution License is available at
*
* http://www.eclipse.org/org/documents/edl-v10.php.
*/
package org.locationtech.jts.coverage;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;
import org.locationtech.jts.algorithm.construct.MaximumInscribedCircle;
import org.locationtech.jts.algorithm.locate.SimplePointInAreaLocator;
import org.locationtech.jts.dissolve.LineDissolver;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.MultiPolygon;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.index.strtree.STRtree;
import org.locationtech.jts.noding.NodedSegmentString;
import org.locationtech.jts.noding.Noder;
import org.locationtech.jts.noding.SegmentStringUtil;
import org.locationtech.jts.noding.snap.SnappingNoder;
import org.locationtech.jts.operation.polygonize.Polygonizer;
import org.locationtech.jts.operation.relateng.RelateNG;
import org.locationtech.jts.operation.relateng.RelatePredicate;
import org.locationtech.jts.util.IntArrayList;
import org.locationtech.jts.util.Stopwatch;
/**
* Cleans the linework of a set of polygonal geometries to form a valid polygonal coverage.
* The input is an array of valid {@link Polygon} or {@link MultiPolygon} geometries
* which may contain topological errors such as overlaps and gaps.
* Empty or non-polygonal inputs are removed.
* Linework is snapped together to eliminate small discrepancies.
* Overlaps are merged with an adjacent polygon, according to a given merge strategy.
* Gaps narrower than a given width are filled and merged with an adjacent polygon.
* The output is an array of polygonal geometries forming a valid polygonal coverage.
*
* <h3>Snapping</h3>
*
* Snapping to nearby vertices and line segment snapping is used to improve noding robustness
* and eliminate small errors in an efficient way,
* By default this uses a very small snapping distance based on the extent of the input data.
* The snapping distance may be specified explicitly.
* This can reduce the number of overlaps and gaps that need to be merged,
* and reduce the risk of spikes formed by merging gaps.
* However, a large snapping distance may introduce undesirable data alteration.
*
* <h3>Overlap Merging</h3>
*
* Overlaps are merged with an adjacent polygon chosen according to a specified merge strategy.
* The supported strategies are:
* <ul>
* <li><b>Longest Border</b>: (default) merge with the polygon with longest shared border
* ({@link #MERGE_LONGEST_BORDER}.)
* <li><b>Maximum/Minimum Area</b>: merge with the polygon with largest or smallest area
* ({@link #MERGE_MAX_AREA}, {@link #MERGE_MIN_AREA}.)
* <li><b>Minimum Index</b>: merge with the polygon with the lowest index in the input array
* ({@link #MERGE_MIN_INDEX}.)
* This allows sorting the input according to some criteria to provide a priority
* for merging gaps.
* </ul>
*
* <h3>Gap Merging</h3>
*
* Gaps which are wider than a given distance are merged with an adjacent polygon.
* Polygon width is determined as twice the radius of the {@link MaximumInscribedCircle}
* of the gap polygon.
* Gaps are merged with the adjacent polygon with longest shared border.
* Empty holes in input polygons are treated as gaps, and may be filled in.
* Gaps which are not fully enclosed ("inlets") are not removed.
* <p>
* Cleaning can be run on a valid coverage to remove gaps.
*
* <p>
* The clean result is an array of polygonal geometries
* which match one-to-one with the input array.
* A result item may be <tt>null</tt> if:
* <ul>
* <li>the input item is non-polygonal or empty
* <li>the input item is so small it is snapped to collapse
* <li>the input item is covered by another input item
* (which may be a larger or a duplicate (nearly or exactly) geometry)
* </ul>
* The result is a valid coverage according to {@link CoverageValidator#isValid(Geometry[])};
*
* <h3>Known Issues</h3>
* <ul>
* <li>Long narrow gaps adjacent to multiple polygons may form spikes when merged with a single polygon.
* </ul>
*
* <h3>Future Enhancements</h3>
* <ul>
* <li>Provide an area-based tolerance for gap merging
* <li>Prevent long narrow gaps from forming spikes by partitioning them before merging.
* <li>Allow merging narrow parts of a gap while leaving wider portions.
* <li>Support a priority value for each input polygon to control overlap and gap merging
* (this could also allow blocking polygons from being merge targets)
* </ul>
*
* @see CoverageValidator
* @author Martin Davis
*
*/
public class CoverageCleaner {
/**
* Merge strategy that chooses polygon with longest common border
*/
public static final int MERGE_LONGEST_BORDER = 0;
/**
* Merge strategy that chooses polygon with maximum area
*/
public static final int MERGE_MAX_AREA = 1;
/**
* Merge strategy that chooses polygon with minimum area
*/
public static final int MERGE_MIN_AREA = 2;
/**
* Merge strategy that chooses polygon with smallest input index
*/
public static final int MERGE_MIN_INDEX = 3;
/**
* Cleans a set of polygonal geometries to form a valid coverage,
* allowing all cleaning parameters to be specified.
*
* @param coverage an array of polygonal geometries to clean
* @param snappingDistance the distance tolerance for snapping
* @param overlapMergeStrategy the strategy to use for merging overlaps
* @param maxGapWidth the maximum width of gaps to merge
* @return the clean coverage
*/
public static Geometry[] clean(Geometry[] coverage, double snappingDistance,
int overlapMergeStrategy, double maxGapWidth) {
CoverageCleaner cc = new CoverageCleaner(coverage);
cc.setSnappingDistance(snappingDistance);
cc.setGapMaximumWidth(maxGapWidth);
cc.setOverlapMergeStrategy(overlapMergeStrategy);
cc.clean();
return cc.getResult();
}
/**
* Cleans a set of polygonal geometries to form a valid coverage,
* using the default overlap merge strategy {@link #MERGE_LONGEST_BORDER}.
*
* @param coverage an array of polygonal geometries to clean
* @param snappingDistance the distance tolerance for snapping
* @param maxGapWidth the maximum width of gaps to merge
* @return the clean coverage
*/
public static Geometry[] clean(Geometry[] coverage, double snappingDistance,
double maxGapWidth) {
CoverageCleaner cc = new CoverageCleaner(coverage);
cc.setSnappingDistance(snappingDistance);
cc.setGapMaximumWidth(maxGapWidth);
cc.clean();
return cc.getResult();
}
/**
* Cleans a set of polygonal geometries to form a valid coverage,
* using the default snapping distance tolerance.
*
* @param coverage an array of polygonal geometries to clean
* @param overlapMergeStrategy the strategy to use for merging overlaps
* @param maxGapWidth the maximum width of gaps to merge
* @return the clean coverage
*/
public static Geometry[] cleanOverlapGap(Geometry[] coverage,
int overlapMergeStrategy, double maxGapWidth) {
return clean(coverage, -1, overlapMergeStrategy, maxGapWidth);
}
/**
* Cleans a set of polygonal geometries to form a valid coverage,
* with default snapping tolerance and overlap merging,
* and merging gaps which are narrower than a specified width.
*
* @param coverage an array of polygonal geometries to clean
* @param maxGapWidth the maximum width of gaps to merge
* @return the clean coverage
*/
public static Geometry[] cleanGapWidth(Geometry[] coverage, double maxGapWidth) {
return clean(coverage, -1, maxGapWidth);
}
private Geometry[] coverage;
private double snappingDistance; // set to compute default
private double gapMaximumWidth = 0.0;
private int overlapMergeStrategy = MERGE_LONGEST_BORDER;
private GeometryFactory geomFactory;
private STRtree covIndex;
private Polygon[] resultants = null;
private CleanCoverage cleanCov;
private HashMap<Integer, IntArrayList> overlapParentMap = new HashMap<Integer, IntArrayList>();
private List<Polygon> overlaps = new ArrayList<Polygon>();
private List<Polygon> gaps = new ArrayList<Polygon>();
private List<Polygon> mergableGaps;
private static final double DEFAULT_SNAPPING_FACTOR = 1.0e8;
/**
* Create a new cleaner instance for a set of polygonal geometries.
*
* @param coverage an array of polygonal geometries to clean
*/
public CoverageCleaner(Geometry[] coverage) {
this.coverage = coverage;
this.geomFactory = coverage[0].getFactory();
snappingDistance = computeDefaultSnappingDistance(coverage);
}
/**
* Sets the snapping distance tolerance.
* The default is to use a small fraction of the input extent diameter.
* A distance of zero prevents snapping from being used.
*
* @param snappingDistance the snapping distance tolerance
*/
public void setSnappingDistance(double snappingDistance) {
//-- use default distance if invalid argument
if (snappingDistance < 0)
return;
this.snappingDistance = snappingDistance;
}
/**
* Sets the overlap merge strategy to use.
* The default is {@link #MERGE_LONGEST_BORDER}.
*
* @param mergeStrategy the merge strategy code
*/
public void setOverlapMergeStrategy(int mergeStrategy) {
if (mergeStrategy < MERGE_LONGEST_BORDER
|| mergeStrategy > MERGE_MIN_INDEX)
throw new IllegalArgumentException("Invalid merge strategy code: " + mergeStrategy);
this.overlapMergeStrategy = mergeStrategy;
}
/**
* Sets the maximum width of the gaps that will be filled and merged.
* The width of a gap is twice the radius of the Maximum Inscribed Circle in the gap polygon,
* A width of zero prevents gaps from being merged.
*
* @param maxWidth the maximum gap width to merge
*/
public void setGapMaximumWidth(double maxWidth) {
if (maxWidth < 0)
return;
this.gapMaximumWidth = maxWidth;
}
//TODO: support snap-rounding noder for precision reduction
//TODO: add merge gaps by: area?
/**
* Cleans the coverage.
*
*/
public void clean() {
computeResultants(snappingDistance);
//System.out.format("Overlaps: %d Gaps: %d\n", overlaps.size(), mergableGaps.size());
//Stopwatch sw = new Stopwatch();
mergeOverlaps(overlapParentMap);
//System.out.println("Merge Overlaps: " + sw.getTimeString());
//sw.reset();
cleanCov.mergeGaps(mergableGaps);
//System.out.println("Merge Gaps: " + sw.getTimeString());
}
/**
* Gets the cleaned coverage.
*
* @return the clean coverage
*/
public Geometry[] getResult() {
return cleanCov.toCoverage(geomFactory);
}
/**
* Gets polygons representing the overlaps in the input,
* which have been merged.
*
* @return a list of overlap polygons
*/
public List<Polygon> getOverlaps() {
return overlaps;
}
/**
* Gets polygons representing the gaps in the input
* which have been merged.
*
* @return a list of gap polygons
*/
public List<Polygon> getMergedGaps() {
return mergableGaps;
}
//-------------------------------------------------
private static double computeDefaultSnappingDistance(Geometry[] geoms) {
double diameter = extent(geoms).getDiameter();
return diameter / DEFAULT_SNAPPING_FACTOR;
}
private static Envelope extent(Geometry[] geoms) {
Envelope env = new Envelope();
for (Geometry geom : geoms) {
env.expandToInclude(geom.getEnvelopeInternal());
}
return env;
}
private void mergeOverlaps(HashMap<Integer, IntArrayList> overlapParentMap) {
for (int resIndex : overlapParentMap.keySet()) {
cleanCov.mergeOverlap(resultants[resIndex], mergeStrategy(overlapMergeStrategy), overlapParentMap.get(resIndex));
}
}
private CleanCoverage.MergeStrategy mergeStrategy(int mergeStrategyId) {
switch (mergeStrategyId) {
case MERGE_LONGEST_BORDER: return new CleanCoverage.MergeStrategy.BorderMergeStrategy();
case MERGE_MAX_AREA: return new CleanCoverage.MergeStrategy.AreaMergeStrategy(true);
case MERGE_MIN_AREA: return new CleanCoverage.MergeStrategy.AreaMergeStrategy(false);
case MERGE_MIN_INDEX: return new CleanCoverage.MergeStrategy.IndexMergeStrategy(false);
}
throw new IllegalArgumentException("Unknown merge strategy: " + mergeStrategyId);
}
private void computeResultants(double tolerance) {
//System.out.println("Coverage Cleaner ===> polygons: " + coverage.length);
//System.out.format("Snapping distance: %f\n", snappingDistance);
//Stopwatch sw = new Stopwatch();
//sw.start();
Geometry nodedEdges = node(coverage, tolerance);
//System.out.println("Noding: " + sw.getTimeString());
//sw.reset();
Geometry cleanEdges = LineDissolver.dissolve(nodedEdges);
//System.out.println("Dissolve: " + sw.getTimeString());
//sw.reset();
resultants = polygonize(cleanEdges);
//System.out.println("Polygonize: " + sw.getTimeString());
cleanCov = new CleanCoverage(coverage.length);
//sw.reset();
createCoverageIndex();
classifyResult(resultants);
//System.out.println("Classify: " + sw.getTimeString());
mergableGaps = findMergableGaps(gaps);
}
private void createCoverageIndex() {
covIndex = new STRtree();
for (int i = 0; i < coverage.length; i++) {
covIndex.insert(coverage[i].getEnvelopeInternal(), i);
}
}
private void classifyResult(Polygon[] resultants) {
for (int i = 0; i < resultants.length; i++) {
Polygon res = resultants[i];
classifyResultant(i, res);
}
}
private void classifyResultant(int resultIndex, Polygon resPoly) {
Point intPt = resPoly.getInteriorPoint();
int parentIndex = -1;
IntArrayList overlapIndexes = null;
@SuppressWarnings("unchecked")
List<Integer> candidateParentIndex = covIndex.query(intPt.getEnvelopeInternal());
for (int i : candidateParentIndex) {
Geometry parent = coverage[i];
if (covers(parent, intPt)) {
//-- found first parent
if (parentIndex < 0) {
parentIndex = i;
}
else {
//-- more than one parent - record them all
if (overlapIndexes == null) {
overlapIndexes = new IntArrayList();
}
overlapIndexes.add(parentIndex);
overlapIndexes.add(i);
}
}
}
/**
* Classify resultant based on # of parents:
* 0 - gap
* 1 - single polygon face
* >1 - overlap
*/
if (parentIndex < 0) {
gaps.add(resPoly);
}
else if (overlapIndexes != null) {
overlapParentMap.put(resultIndex, overlapIndexes);
overlaps.add(resPoly);
}
else {
cleanCov.add(parentIndex, resPoly);
}
}
private static boolean covers(Geometry poly, Point intPt) {
return SimplePointInAreaLocator.isContained(intPt.getCoordinate(), poly);
//return RelateNG.relate(poly, intPt, RelatePredicate.covers());
}
private List<Polygon> findMergableGaps(List<Polygon> gaps2) {
return gaps.stream().filter(gap -> isMergableGap(gap))
.collect(Collectors.toList());
}
private boolean isMergableGap(Polygon gap) {
if (gapMaximumWidth <= 0) {
return false;
}
return MaximumInscribedCircle.isRadiusWithin(gap, gapMaximumWidth / 2.0);
}
private static Polygon[] polygonize(Geometry cleanEdges) {
Polygonizer polygonizer = new Polygonizer();
polygonizer.add(cleanEdges);
Geometry polys = polygonizer.getGeometry();
return toPolygonArray(polys);
}
public static Geometry node(Geometry[] coverage,
double snapDistance)
{
List<NodedSegmentString> segs = new ArrayList<NodedSegmentString>();
for (Geometry geom : coverage) {
//-- skip non-polygonal and empty elements
if (! isPolygonal(geom))
continue;
if (geom.isEmpty())
continue;
extractNodedSegmentStrings(geom, segs);
}
Noder noder = new SnappingNoder(snapDistance);
noder.computeNodes(segs);
Collection nodedSegStrings = noder.getNodedSubstrings();
return SegmentStringUtil.toGeometry(nodedSegStrings, coverage[0].getFactory());
}
private static boolean isPolygonal(Geometry geom) {
return geom instanceof Polygon || geom instanceof MultiPolygon;
}
private static void extractNodedSegmentStrings(Geometry geom, List<NodedSegmentString> segs) {
//TODO: do more efficiently by adding directly to list
@SuppressWarnings("unchecked")
List<NodedSegmentString> segsGeom = SegmentStringUtil.extractNodedSegmentStrings(geom);
segs.addAll(segsGeom);
}
private static Polygon[] toPolygonArray(Geometry geom) {
Polygon[] geoms = new Polygon[geom.getNumGeometries()];
for (int i = 0; i < geom.getNumGeometries(); i++) {
geoms[i]= (Polygon) geom.getGeometryN(i);
}
return geoms;
}
}