GeographicDataParser.java
/**
* Copyright (c) 2020, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* SPDX-License-Identifier: MPL-2.0
*/
package com.powsybl.iidm.geodata.odre;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.powsybl.iidm.geodata.elements.GeoShape;
import com.powsybl.iidm.geodata.utils.DistanceCalculator;
import com.powsybl.iidm.geodata.utils.GeoShapeDeserializer;
import com.powsybl.iidm.geodata.utils.LineGraph;
import com.powsybl.iidm.network.extensions.Coordinate;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.time.StopWatch;
import org.jgrapht.Graph;
import org.jgrapht.event.ConnectedComponentTraversalEvent;
import org.jgrapht.event.TraversalListenerAdapter;
import org.jgrapht.event.VertexTraversalEvent;
import org.jgrapht.traverse.BreadthFirstIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.*;
import java.util.stream.Stream;
import static java.util.Collections.min;
import static java.util.Collections.reverse;
/**
* @author Chamseddine Benhamed {@literal <chamseddine.benhamed at rte-france.com>}
*/
public final class GeographicDataParser {
private GeographicDataParser() {
}
private static final Logger LOGGER = LoggerFactory.getLogger(GeographicDataParser.class);
private static final int THRESHOLD = 5;
/**
* Parse the substations CSV data contained in the given reader, using the given odreConfig for column names.
* @return the map of substation coordinates indexed by the substation id
*/
public static Map<String, Coordinate> parseSubstations(Reader reader, OdreConfig odreConfig) {
Map<String, Coordinate> substations = new LinkedHashMap<>();
StopWatch stopWatch = new StopWatch();
stopWatch.start();
try {
CSVParser records = CSVParser.parse(reader, FileValidator.CSV_FORMAT);
if (FileValidator.validateSubstationsHeaders(records, odreConfig)) {
for (CSVRecord row : records) {
String id = row.get(odreConfig.substationIdColumn());
substations.computeIfAbsent(id, key -> {
double lon = Double.parseDouble(row.get(odreConfig.substationLongitudeColumn()));
double lat = Double.parseDouble(row.get(odreConfig.substationLatitudeColumn()));
return new Coordinate(lat, lon);
});
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
LOGGER.info("{} substations read in {} ms", substations.size(), stopWatch.getTime());
return substations;
}
/**
* Parse the lines CSV data contained in the given readers, using the given odreConfig for column names and line types.
* @param aerialLinesReader the reader containing the aerial lines CSV data
* @param undergroundLinesReader the reader containing the underground lines CSV data
* @param odreConfig the config used for column names and line types
* @return the map of line coordinates indexed by the line id
*/
public static Map<String, List<Coordinate>> parseLines(Reader aerialLinesReader, Reader undergroundLinesReader, OdreConfig odreConfig) {
try {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
Map<String, List<List<Coordinate>>> coordinatesListsByLine = new HashMap<>();
CSVParser aerialLinesRecords = CSVParser.parse(aerialLinesReader, FileValidator.CSV_FORMAT);
CSVParser undergroundLinesRecords = CSVParser.parse(undergroundLinesReader, FileValidator.CSV_FORMAT);
if (!FileValidator.validateAerialLinesHeaders(aerialLinesRecords, odreConfig)
|| !FileValidator.validateUndergroundHeaders(undergroundLinesRecords, odreConfig)) {
return Collections.emptyMap();
}
parseLine(coordinatesListsByLine, aerialLinesRecords, odreConfig);
parseLine(coordinatesListsByLine, undergroundLinesRecords, odreConfig);
Map<String, List<Coordinate>> lines = fixLines(coordinatesListsByLine);
LOGGER.info("{} lines read in {} ms", lines.size(), stopWatch.getTime());
if (coordinatesListsByLine.size() != lines.size()) {
LOGGER.warn("Total discarded lines : {}/{} ",
coordinatesListsByLine.size() - lines.size(), coordinatesListsByLine.size());
}
return lines;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* "Fixing" the lines coordinates data: this method tries to calculate a single list when there are several lists for
* a given line, which often occurs in the ODRE data.
* @param coordinatesListsByLine the map of all the lists of coordinates, indexed by the line id
* @return the map of the "fixed" line coordinates, indexed by the line id
*/
private static Map<String, List<Coordinate>> fixLines(Map<String, List<List<Coordinate>>> coordinatesListsByLine) {
Map<String, List<Coordinate>> lines = new HashMap<>();
int linesWithOneConnectedSet = 0;
int linesWithTwoOrMoreConnectedSets = 0;
int oneConnectedSetDiscarded = 0;
int twoOrMoreConnectedSetsDiscarded = 0;
for (Map.Entry<String, List<List<Coordinate>>> e : coordinatesListsByLine.entrySet()) {
String lineId = e.getKey();
List<List<Coordinate>> coordinatesLists = e.getValue();
if (coordinatesLists.size() == 1) {
// Easy case: only one list
lines.put(lineId, coordinatesLists.get(0));
} else {
// We want to calculate a single list: we construct a graph based on the coordinates, adding a vertex
// for each coordinate, and adding an edge between two consecutive coordinates
LineGraph<Coordinate, Object> graph = new LineGraph<>(Object.class);
coordinatesLists.forEach(graph::addVerticesAndEdges);
List<ConnectedSet> connectedSets = getConnectedSets(graph);
if (connectedSets.size() == 1) {
linesWithOneConnectedSet++;
ConnectedSet connectedSet = connectedSets.get(0);
if (connectedSet.ends().size() == 2) {
// Only one connected set and two ends: this is a single line
lines.put(lineId, connectedSet.list());
} else {
// Only one connected set and more than two ends: there are forks in the lines
oneConnectedSetDiscarded++;
}
} else {
// there are several connected sets: we try to order them to have a single line
linesWithTwoOrMoreConnectedSets++;
List<List<Coordinate>> coordinatesComponents = createMultipleConnectedSetsCoordinatesList(connectedSets);
if (coordinatesComponents.size() != connectedSets.size() || coordinatesComponents.size() > 2) {
// This happens if there is a fork in one of the connected components or if there are more than 2 connected components
twoOrMoreConnectedSetsDiscarded++;
continue;
}
lines.put(lineId, aggregateCoordinates(coordinatesComponents));
}
}
}
LOGGER.info("{} lines have one Connected set, {} of them were discarded", linesWithOneConnectedSet, oneConnectedSetDiscarded);
LOGGER.info("{} lines have two or more Connected sets, {} of them were discarded", linesWithTwoOrMoreConnectedSets, twoOrMoreConnectedSetsDiscarded);
return lines;
}
/**
* Compute the connected sets for the given graph. The corresponding list of coordinates and list of extremities
* for each connected component is computed at the same time.
*/
private static List<ConnectedSet> getConnectedSets(Graph<Coordinate, Object> graph) {
List<ConnectedSet> connectedSets = new ArrayList<>();
Set<Coordinate> vertexSet = graph.vertexSet();
Optional<Coordinate> endCoord = vertexSet.stream().filter(v -> graph.degreeOf(v) == 1).findFirst();
if (endCoord.isPresent()) {
var bfi = new BreadthFirstIterator<Coordinate, Object>(graph, () -> Stream.concat(Stream.of(endCoord.get()), vertexSet.stream()).iterator());
bfi.addTraversalListener(new GeoTraversalListener(graph, connectedSets));
while (bfi.hasNext()) {
bfi.next();
}
} else {
connectedSets = List.of(new ConnectedSet(vertexSet.stream().toList(), List.of()));
}
return connectedSets;
}
/**
* Parsing the CSV lines data which is given by the CSVParser, using the given odreConfig for column names and line types.
* The resulting coordinates are put in the given map.
*/
private static void parseLine(Map<String, List<List<Coordinate>>> coordinateListsByLine, CSVParser csvParser, OdreConfig odreConfig) throws JsonProcessingException {
Map<String, String> idsColumnNames = odreConfig.idsColumnNames();
for (CSVRecord row : csvParser) {
List<String> ids = Stream.of(row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_1)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_2)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_3)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_4)),
row.get(idsColumnNames.get(OdreConfig.LINE_ID_KEY_5))).filter(Objects::nonNull).filter(s -> !s.isEmpty()).distinct().toList();
GeoShape geoShape = GeoShapeDeserializer.read(row.get(odreConfig.geoShapeColumn()));
if (ids.isEmpty() || geoShape.coordinates().isEmpty()) {
continue;
}
for (String lineId : ids) {
coordinateListsByLine.computeIfAbsent(lineId, key -> new ArrayList<>())
.add(geoShape.coordinates());
}
}
}
/**
* Calculate the distance between the first and the last coordinates of the given list
*/
private static double getBranchLength(List<Coordinate> coordinatesComponent) {
return DistanceCalculator.distance(coordinatesComponent.get(0), coordinatesComponent.get(coordinatesComponent.size() - 1));
}
/**
* Aggregate coordinates of two connected components into one single list by trying to find which extremity connects to which other extremity
*/
private static List<Coordinate> aggregateCoordinates(List<List<Coordinate>> coordinatesComponents) {
List<Coordinate> aggregatedCoordinates;
List<Coordinate> coordinatesComponent1 = coordinatesComponents.get(0);
List<Coordinate> coordinatesComponent2 = coordinatesComponents.get(1);
double l1 = getBranchLength(coordinatesComponent1);
double l2 = getBranchLength(coordinatesComponent2);
if (100 * Math.min(l1, l2) / Math.max(l1, l2) < THRESHOLD) {
return l1 > l2 ? coordinatesComponent1 : coordinatesComponent2;
}
double d1 = DistanceCalculator.distance(coordinatesComponent1.get(0), coordinatesComponent2.get(coordinatesComponent2.size() - 1));
double d2 = DistanceCalculator.distance(coordinatesComponent1.get(0), coordinatesComponent2.get(0));
double d3 = DistanceCalculator.distance(coordinatesComponent1.get(coordinatesComponent1.size() - 1), coordinatesComponent2.get(coordinatesComponent2.size() - 1));
double d4 = DistanceCalculator.distance(coordinatesComponent1.get(coordinatesComponent1.size() - 1), coordinatesComponent2.get(0));
List<Double> distances = Arrays.asList(d1, d2, d3, d4);
double min = min(distances);
if (d1 == min) {
aggregatedCoordinates = new ArrayList<>(coordinatesComponent2);
aggregatedCoordinates.addAll(coordinatesComponent1);
} else if (d2 == min) {
reverse(coordinatesComponent1);
aggregatedCoordinates = new ArrayList<>(coordinatesComponent1);
aggregatedCoordinates.addAll(coordinatesComponent2);
} else if (d3 == min) {
reverse(coordinatesComponent2);
aggregatedCoordinates = new ArrayList<>(coordinatesComponent1);
aggregatedCoordinates.addAll(coordinatesComponent2);
} else {
aggregatedCoordinates = new ArrayList<>(coordinatesComponent1);
aggregatedCoordinates.addAll(coordinatesComponent2);
}
return aggregatedCoordinates;
}
/**
* Constructing the list of coordinates of each connected component, filtering out the connected sets which have more than two ends (or zero)
*/
private static List<List<Coordinate>> createMultipleConnectedSetsCoordinatesList(List<ConnectedSet> connectedSets) {
List<List<Coordinate>> coordinatesComponents = new ArrayList<>();
for (ConnectedSet connectedSet : connectedSets) {
List<Coordinate> endsComponent = connectedSet.ends();
if (endsComponent.size() == 2) {
coordinatesComponents.add(connectedSet.list());
} else {
break;
}
}
return coordinatesComponents;
}
private record ConnectedSet(List<Coordinate> list, List<Coordinate> ends) {
}
private static class GeoTraversalListener extends TraversalListenerAdapter<Coordinate, Object> {
private final List<ConnectedSet> connectedSets;
private final Graph<Coordinate, Object> graph;
private List<Coordinate> currentConnectedSet;
private List<Coordinate> currentConnectedSetEnds;
public GeoTraversalListener(Graph<Coordinate, Object> graph, List<ConnectedSet> connectedSets) {
this.graph = graph;
this.connectedSets = connectedSets;
}
@Override
public void connectedComponentFinished(ConnectedComponentTraversalEvent e) {
connectedSets.add(new ConnectedSet(currentConnectedSet, currentConnectedSetEnds));
}
@Override
public void connectedComponentStarted(ConnectedComponentTraversalEvent e) {
currentConnectedSet = new ArrayList<>();
currentConnectedSetEnds = new ArrayList<>();
}
@Override
public void vertexTraversed(VertexTraversalEvent<Coordinate> e) {
Coordinate v = e.getVertex();
currentConnectedSet.add(v);
if (graph.degreeOf(v) == 1) {
currentConnectedSetEnds.add(v);
}
}
}
}