WKBReader.java

/*
 * Copyright (c) 2016 Vivid Solutions.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * and Eclipse Distribution License v. 1.0 which accompanies this distribution.
 * The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v20.html
 * and the Eclipse Distribution License is available at
 *
 * http://www.eclipse.org/org/documents/edl-v10.php.
 */
package org.locationtech.jts.io;

import java.io.IOException;
import java.util.EnumSet;

import org.locationtech.jts.geom.CoordinateSequence;
import org.locationtech.jts.geom.CoordinateSequenceFactory;
import org.locationtech.jts.geom.CoordinateSequences;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryCollection;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.LineString;
import org.locationtech.jts.geom.LinearRing;
import org.locationtech.jts.geom.MultiLineString;
import org.locationtech.jts.geom.MultiPoint;
import org.locationtech.jts.geom.MultiPolygon;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.geom.PrecisionModel;

/**
 * Reads a {@link Geometry}from a byte stream in Well-Known Binary format.
 * Supports use of an {@link InStream}, which allows easy use
 * with arbitrary byte stream sources.
 * <p>
 * This class reads the format describe in {@link WKBWriter}.  
 * It partially handles
 * the <b>Extended WKB</b> format used by PostGIS, 
 * by parsing and storing optional SRID values.
 * If a SRID is not specified in an element geometry, it is inherited
 * from the parent's SRID.
 * The default SRID value is 0.
 * <p>
 * Although not defined in the WKB specification, empty points
 * are handled if they are represented as a Point with <code>NaN</code> X and Y ordinates.
 * <p>
 * The reader repairs structurally-invalid input
 * (specifically, LineStrings and LinearRings which contain
 * too few points have vertices added,
 * and non-closed rings are closed).
 * <p>
 * The reader handles most errors caused by malformed or malicious WKB data.
 * It checks for obviously excessive values of the fields 
 * <code>numElems</code>, <code>numRings</code>, and <code>numCoords</code>.
 * It also checks that the reader does not read beyond the end of the data supplied.
 * A {@link ParseException} is thrown if this situation is detected.
 * <p>
 * This class is designed to support reuse of a single instance to read multiple
 * geometries. This class is not thread-safe; each thread should create its own
 * instance.
 * <p>
 * As of version 1.15, the reader can read geometries following the OGC 06-103r4 
 * Simple Features Access 1.2.1 specification,
 * which aligns with the ISO 19125 standard.
 * This format is used by Spatialite and Geopackage.
 * <p>
 * The difference between PostGIS EWKB format and the new ISO/OGC specification is
 * that Z and M coordinates are detected with a bit mask on the higher byte in
 * the former case (0x80 for Z and 0x40 for M) while new OGC specification use
 * specific int ranges for 2D geometries, Z geometries (2D code+1000), M geometries
 * (2D code+2000) and ZM geometries (2D code+3000).
 * <p>
 * Note that the {@link WKBWriter} is not changed and still writes the PostGIS EWKB
 * geometry format.
 * 
 * @see WKBWriter for a formal format specification
 */
public class WKBReader
{
  /**
   * Converts a hexadecimal string to a byte array.
   * The hexadecimal digit symbols are case-insensitive.
   *
   * @param hex a string containing hex digits
   * @return an array of bytes with the value of the hex string
   */
  public static byte[] hexToBytes(String hex)
  {
    int byteLen = hex.length() / 2;
    byte[] bytes = new byte[byteLen];

    for (int i = 0; i < hex.length() / 2; i++) {
      int i2 = 2 * i;
      if (i2 + 1 > hex.length())
        throw new IllegalArgumentException("Hex string has odd length");

      int nib1 = hexToInt(hex.charAt(i2));
      int nib0 = hexToInt(hex.charAt(i2 + 1));
      byte b = (byte) ((nib1 << 4) + (byte) nib0);
      bytes[i] = b;
    }
    return bytes;
  }

  private static int hexToInt(char hex)
  {
    int nib = Character.digit(hex, 16);
    if (nib < 0)
      throw new IllegalArgumentException("Invalid hex digit: '" + hex + "'");
    return nib;
  }

  private static final String INVALID_GEOM_TYPE_MSG
  = "Invalid geometry type encountered in ";

  private static final String FIELD_NUMCOORDS = "numCoords";

  private static final String FIELD_NUMRINGS = "numRings";

  private static final String FIELD_NUMELEMS = "numElems";

  private GeometryFactory factory;
  private CoordinateSequenceFactory csFactory;
  private PrecisionModel precisionModel;
  // default dimension - will be set on read
  private int inputDimension = 2;
  /**
   * true if structurally invalid input should be reported rather than repaired.
   * At some point this could be made client-controllable.
   */
  private boolean isStrict = false;
  private ByteOrderDataInStream dis = new ByteOrderDataInStream();
  private double[] ordValues;

  private int maxNumFieldValue;

  public WKBReader() {
    this(new GeometryFactory());
  }

  public WKBReader(GeometryFactory geometryFactory) {
    this.factory = geometryFactory;
    precisionModel = factory.getPrecisionModel();
    csFactory = factory.getCoordinateSequenceFactory();
  }

  /**
   * Reads a single {@link Geometry} in WKB format from a byte array.
   *
   * @param bytes the byte array to read from
   * @return the geometry read
   * @throws ParseException if the WKB is ill-formed
   */
  public Geometry read(byte[] bytes) throws ParseException
  {  
    // possibly reuse the ByteArrayInStream?
    // don't throw IOExceptions, since we are not doing any I/O
    try {
      return read(new ByteArrayInStream(bytes), bytes.length / 8);
    }
    catch (IOException ex) {
      throw new RuntimeException("Unexpected IOException caught: " + ex.getMessage());
    }
  }

  /**
   * Reads a {@link Geometry} in binary WKB format from an {@link InStream}.
   *
   * @param is the stream to read from
   * @return the Geometry read
   * @throws IOException if the underlying stream creates an error
   * @throws ParseException if the WKB is ill-formed
   */
  public Geometry read(InStream is)
  throws IOException, ParseException
  {
    // can't tell size of InStream, but MAX_VALUE should be safe
    return read(is, Integer.MAX_VALUE);
  }

  private Geometry read(InStream is, int maxCoordNum)
  throws IOException, ParseException
  {
    /**
     * This puts an upper bound on the allowed value
     * in coordNum fields.
     * It avoids OOM exceptions due to malformed input.
     */
    this.maxNumFieldValue = maxCoordNum;
    dis.setInStream(is);
    return readGeometry(0);
  }
  
  private int readNumField(String fieldName) throws IOException, ParseException {
    // num field is unsigned int, but Java has only signed int
    int num = dis.readInt();
    if (num < 0 || num > maxNumFieldValue) {
      throw new ParseException(fieldName + " value is too large");
    }
    return num;
  }
  
  private Geometry readGeometry(int SRID)
  throws IOException, ParseException
  {

      // determine byte order
      byte byteOrderWKB = dis.readByte();

      // always set byte order, since it may change from geometry to geometry
     if(byteOrderWKB == WKBConstants.wkbNDR)
     {
        dis.setOrder(ByteOrderValues.LITTLE_ENDIAN);
     }
     else if(byteOrderWKB == WKBConstants.wkbXDR)
     {
        dis.setOrder(ByteOrderValues.BIG_ENDIAN);
     }
     else if(isStrict)
     {
        throw new ParseException("Unknown geometry byte order (not NDR or XDR): " + byteOrderWKB);
     }
     //if not strict and not XDR or NDR, then we just use the dis default set at the
     //start of the geometry (if a multi-geometry).  This  allows WBKReader to work
     //with Spatialite native BLOB WKB, as well as other WKB variants that might just
     //specify endian-ness at the start of the multigeometry.


    int typeInt = dis.readInt();
    
    /**
     * To get geometry type mask out EWKB flag bits, 
     * and use only low 3 digits of type word.
     * This supports both EWKB and ISO/OGC.
     */
    int geometryType = (typeInt & 0xffff) % 1000;

    // handle 3D and 4D WKB geometries
    // geometries with Z coordinates have the 0x80 flag (postgis EWKB)
    // or are in the 1000 range (Z) or in the 3000 range (ZM) of geometry type (ISO/OGC 06-103r4)
    boolean hasZ = ((typeInt & 0x80000000) != 0 || (typeInt & 0xffff)/1000 == 1 || (typeInt & 0xffff)/1000 == 3);
    // geometries with M coordinates have the 0x40 flag (postgis EWKB)
    // or are in the 1000 range (M) or in the 3000 range (ZM) of geometry type (ISO/OGC 06-103r4)
    boolean hasM = ((typeInt & 0x40000000) != 0 || (typeInt & 0xffff)/1000 == 2 || (typeInt & 0xffff)/1000 == 3);
    //System.out.println(typeInt + " - " + geometryType + " - hasZ:" + hasZ);
    inputDimension = 2 + (hasZ ? 1 : 0) + (hasM ? 1 : 0);
    
    EnumSet<Ordinate> ordinateFlags = EnumSet.of(Ordinate.X, Ordinate.Y);
    if (hasZ) {
      ordinateFlags.add(Ordinate.Z);
    }
    if (hasM) {
      ordinateFlags.add(Ordinate.M);
    }

    // determine if SRIDs are present (EWKB only)
    boolean hasSRID = (typeInt & 0x20000000) != 0;
    if (hasSRID) {
      SRID = dis.readInt();
    }

    // only allocate ordValues buffer if necessary
    if (ordValues == null || ordValues.length < inputDimension)
      ordValues = new double[inputDimension];

    Geometry geom = null;
    switch (geometryType) {
      case WKBConstants.wkbPoint :
        geom = readPoint(ordinateFlags);
        break;
      case WKBConstants.wkbLineString :
        geom = readLineString(ordinateFlags);
        break;
     case WKBConstants.wkbPolygon :
       geom = readPolygon(ordinateFlags);
        break;
      case WKBConstants.wkbMultiPoint :
        geom = readMultiPoint(SRID);
        break;
      case WKBConstants.wkbMultiLineString :
        geom = readMultiLineString(SRID);
        break;
     case WKBConstants.wkbMultiPolygon :
        geom = readMultiPolygon(SRID);
        break;
      case WKBConstants.wkbGeometryCollection :
        geom = readGeometryCollection(SRID);
        break;
      default: 
        throw new ParseException("Unknown WKB type " + geometryType);
    }
    setSRID(geom, SRID);
    return geom;
  }

  /**
   * Sets the SRID, if it was specified in the WKB
   *
   * @param g the geometry to update
   * @return the geometry with an updated SRID value, if required
   */
  private Geometry setSRID(Geometry g, int SRID)
  {
    if (SRID != 0)
      g.setSRID(SRID);
    return g;
  }

  private Point readPoint(EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    CoordinateSequence pts = readCoordinateSequence(1, ordinateFlags);
    // If X and Y are NaN create a empty point
    if (Double.isNaN(pts.getX(0)) || Double.isNaN(pts.getY(0))) {
      return factory.createPoint();
    }
    return factory.createPoint(pts);
  }

  private LineString readLineString(EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    int size = readNumField(FIELD_NUMCOORDS);
    CoordinateSequence pts = readCoordinateSequenceLineString(size, ordinateFlags);
    return factory.createLineString(pts);
  }

  private LinearRing readLinearRing(EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    int size = readNumField(FIELD_NUMCOORDS);
    CoordinateSequence pts = readCoordinateSequenceRing(size, ordinateFlags);
    return factory.createLinearRing(pts);
  }

  private Polygon readPolygon(EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    int numRings = readNumField(FIELD_NUMRINGS);
    LinearRing[] holes = null;
    if (numRings > 1)
      holes = new LinearRing[numRings - 1];

    // empty polygon
    if (numRings <= 0)
      return factory.createPolygon();
    
    LinearRing shell = readLinearRing(ordinateFlags);
    for (int i = 0; i < numRings - 1; i++) {
      holes[i] = readLinearRing(ordinateFlags);
    }
    return factory.createPolygon(shell, holes);
  }

  private MultiPoint readMultiPoint(int SRID) throws IOException, ParseException
  {
    int numGeom = readNumField(FIELD_NUMELEMS);
    Point[] geoms = new Point[numGeom];
    for (int i = 0; i < numGeom; i++) {
      Geometry g = readGeometry(SRID);
      if (! (g instanceof Point))
        throw new ParseException(INVALID_GEOM_TYPE_MSG + "MultiPoint");
      geoms[i] = (Point) g;
    }
    return factory.createMultiPoint(geoms);
  }

  private MultiLineString readMultiLineString(int SRID) throws IOException, ParseException
  {
    int numGeom = readNumField(FIELD_NUMELEMS);
    LineString[] geoms = new LineString[numGeom];
    for (int i = 0; i < numGeom; i++) {
      Geometry g = readGeometry(SRID);
      if (! (g instanceof LineString))
        throw new ParseException(INVALID_GEOM_TYPE_MSG + "MultiLineString");
      geoms[i] = (LineString) g;
    }
    return factory.createMultiLineString(geoms);
  }

  private MultiPolygon readMultiPolygon(int SRID) throws IOException, ParseException
  {
    int numGeom = readNumField(FIELD_NUMELEMS);
    Polygon[] geoms = new Polygon[numGeom];

    for (int i = 0; i < numGeom; i++) {
      Geometry g = readGeometry(SRID);
      if (! (g instanceof Polygon))
        throw new ParseException(INVALID_GEOM_TYPE_MSG + "MultiPolygon");
      geoms[i] = (Polygon) g;
    }
    return factory.createMultiPolygon(geoms);
  }

  private GeometryCollection readGeometryCollection(int SRID) throws IOException, ParseException
  {
    int numGeom = readNumField(FIELD_NUMELEMS);
    Geometry[] geoms = new Geometry[numGeom];
    for (int i = 0; i < numGeom; i++) {
      geoms[i] = readGeometry(SRID);
    }
    return factory.createGeometryCollection(geoms);
  }

  private CoordinateSequence readCoordinateSequence(int size, EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    CoordinateSequence seq = csFactory.create(size, inputDimension, ordinateFlags.contains(Ordinate.M) ? 1 : 0);
    int targetDim = seq.getDimension();
    if (targetDim > inputDimension)
      targetDim = inputDimension;
    for (int i = 0; i < size; i++) {
      readCoordinate();
      for (int j = 0; j < targetDim; j++) {
        seq.setOrdinate(i, j, ordValues[j]);
      }
    }
    return seq;
  }

  private CoordinateSequence readCoordinateSequenceLineString(int size, EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    CoordinateSequence seq = readCoordinateSequence(size, ordinateFlags);
    if (isStrict) return seq;
    if (seq.size() == 0 || seq.size() >= 2) return seq;
    return CoordinateSequences.extend(csFactory, seq, 2);
  }
  
  private CoordinateSequence readCoordinateSequenceRing(int size, EnumSet<Ordinate> ordinateFlags) throws IOException, ParseException
  {
    CoordinateSequence seq = readCoordinateSequence(size, ordinateFlags);
    if (isStrict) return seq;
    if (CoordinateSequences.isRing(seq)) return seq;
    return CoordinateSequences.ensureValidRing(csFactory, seq);
  }

  /**
   * Reads a coordinate value with the specified dimensionality.
   * Makes the X and Y ordinates precise according to the precision model
   * in use.
   * @throws ParseException 
   */
  private void readCoordinate() throws IOException, ParseException
  {
    for (int i = 0; i < inputDimension; i++) {
      if (i <= 1) {
        ordValues[i] = precisionModel.makePrecise(dis.readDouble());
      }
      else {
        ordValues[i] = dis.readDouble();
      }

    }
  }

}