JsonCanonicalizer.java

/*
 * Copyright 2022 The Sigstore Authors.
 * Copyright 2006-2018 WebPKI.org (http://webpki.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package dev.sigstore.json.canonicalizer;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.TreeMap;
import java.util.Vector;
import java.util.regex.Pattern;

/**
 * JSON Canonicalizer
 *
 * @author Anders Rundgren
 */
public class JsonCanonicalizer {

  StringBuilder buffer;

  public JsonCanonicalizer(String jsonData) throws IOException {
    buffer = new StringBuilder();
    serialize(new JsonDecoder(jsonData).root);
  }

  public JsonCanonicalizer(byte[] jsonData) throws IOException {
    this(new String(jsonData, StandardCharsets.UTF_8));
  }

  private void escape(char c) {
    buffer.append('\\').append(c);
  }

  @SuppressWarnings({"LoopOverCharArray", "NarrowingCompoundAssignment"})
  private void serializeString(String value) {
    buffer.append('"');
    for (char c : value.toCharArray()) {
      switch (c) {
        case '\n':
          // the below line is the only difference in our fork with the original code. We pass the
          // newline character
          // through rather than escaping. See package-info for more information.
          buffer.append(c);
          break;

        case '\b':
          escape('b');
          break;

        case '\f':
          escape('f');
          break;

        case '\r':
          escape('r');
          break;

        case '\t':
          escape('t');
          break;

        case '"':
        case '\\':
          escape(c);
          break;

        default:
          if (c < 0x20) {
            escape('u');
            for (int i = 0; i < 4; i++) {
              int hex = c >>> 12;
              buffer.append((char) (hex > 9 ? hex + 'a' - 10 : hex + '0'));
              c <<= 4;
            }
            break;
          }
          buffer.append(c);
      }
    }
    buffer.append('"');
  }

  @SuppressWarnings("unchecked")
  void serialize(Object o) throws IOException {
    if (o instanceof TreeMap) {
      buffer.append('{');
      boolean next = false;
      for (Map.Entry<String, Object> keyValue : ((TreeMap<String, Object>) o).entrySet()) {
        if (next) {
          buffer.append(',');
        }
        next = true;
        serializeString(keyValue.getKey());
        buffer.append(':');
        serialize(keyValue.getValue());
      }
      buffer.append('}');
    } else if (o instanceof Vector) {
      buffer.append('[');
      boolean next = false;
      for (Object value : ((Vector<Object>) o).toArray()) {
        if (next) {
          buffer.append(',');
        }
        next = true;
        serialize(value);
      }
      buffer.append(']');
    } else if (o == null) {
      buffer.append("null");
    } else if (o instanceof String) {
      serializeString((String) o);
    } else if (o instanceof Boolean) {
      buffer.append((boolean) o);
    } else if (o instanceof Double) {
      buffer.append(NumberToJSON.serializeNumber((Double) o));
    } else {
      throw new InternalError("Unknown object: " + o);
    }
  }

  public String getEncodedString() {
    return buffer.toString();
  }

  public byte[] getEncodedUTF8() throws IOException {
    return getEncodedString().getBytes(StandardCharsets.UTF_8);
  }
}

class JsonDecoder {

  static final char LEFT_CURLY_BRACKET = '{';
  static final char RIGHT_CURLY_BRACKET = '}';
  static final char DOUBLE_QUOTE = '"';
  static final char COLON_CHARACTER = ':';
  static final char LEFT_BRACKET = '[';
  static final char RIGHT_BRACKET = ']';
  static final char COMMA_CHARACTER = ',';
  static final char BACK_SLASH = '\\';

  static final Pattern BOOLEAN_PATTERN = Pattern.compile("true|false");
  static final Pattern NUMBER_PATTERN = Pattern.compile("-?[0-9]+(\\.[0-9]+)?([eE][-+]?[0-9]+)?");

  int index;

  int maxLength;

  String jsonData;

  Object root;

  JsonDecoder(String jsonString) throws IOException {
    jsonData = jsonString;
    maxLength = jsonData.length();
    if (testNextNonWhiteSpaceChar() == LEFT_BRACKET) {
      scan();
      root = parseArray();
    } else {
      scanFor(LEFT_CURLY_BRACKET);
      root = parseObject();
    }
    while (index < maxLength) {
      if (!isWhiteSpace(jsonData.charAt(index++))) {
        throw new IOException("Improperly terminated JSON object");
      }
    }
  }

  Object parseElement() throws IOException {
    switch (scan()) {
      case LEFT_CURLY_BRACKET:
        return parseObject();

      case DOUBLE_QUOTE:
        return parseQuotedString();

      case LEFT_BRACKET:
        return parseArray();

      default:
        return parseSimpleType();
    }
  }

  Object parseObject() throws IOException {
    TreeMap<String, Object> dict = new TreeMap<String, Object>();
    boolean next = false;
    while (testNextNonWhiteSpaceChar() != RIGHT_CURLY_BRACKET) {
      if (next) {
        scanFor(COMMA_CHARACTER);
      }
      next = true;
      scanFor(DOUBLE_QUOTE);
      String name = parseQuotedString();
      scanFor(COLON_CHARACTER);
      if (dict.put(name, parseElement()) != null) {
        throw new IOException("Duplicate property: " + name);
      }
    }
    scan();
    return dict;
  }

  @SuppressWarnings("JdkObsolete")
  Object parseArray() throws IOException {
    Vector<Object> array = new Vector<Object>();
    boolean next = false;
    while (testNextNonWhiteSpaceChar() != RIGHT_BRACKET) {
      if (next) {
        scanFor(COMMA_CHARACTER);
      } else {
        next = true;
      }
      array.add(parseElement());
    }
    scan();
    return array;
  }

  Object parseSimpleType() throws IOException {
    index--;
    StringBuilder tempBuffer = new StringBuilder();
    char c;
    while ((c = testNextNonWhiteSpaceChar()) != COMMA_CHARACTER
        && c != RIGHT_BRACKET
        && c != RIGHT_CURLY_BRACKET) {
      if (isWhiteSpace(c = nextChar())) {
        break;
      }
      tempBuffer.append(c);
    }
    String token = tempBuffer.toString();
    if (token.length() == 0) {
      throw new IOException("Missing argument");
    }
    if (NUMBER_PATTERN.matcher(token).matches()) {
      return Double.valueOf(token); // Syntax check...
    } else if (BOOLEAN_PATTERN.matcher(token).matches()) {
      return Boolean.valueOf(token);
    } else if (token.equals("null")) {
      return null;
    } else {
      throw new IOException("Unrecognized or malformed JSON token: " + token);
    }
  }

  String parseQuotedString() throws IOException {
    StringBuilder result = new StringBuilder();
    while (true) {
      char c = nextChar();
      if (c < ' ') {
        throw new IOException(
            c == '\n'
                ? "Unterminated string literal"
                : "Unescaped control character: 0x" + Integer.toString(c, 16));
      }
      if (c == DOUBLE_QUOTE) {
        break;
      }
      if (c == BACK_SLASH) {
        switch (c = nextChar()) {
          case '"':
          case '\\':
          case '/':
            break;

          case 'b':
            c = '\b';
            break;

          case 'f':
            c = '\f';
            break;

          case 'n':
            c = '\n';
            break;

          case 'r':
            c = '\r';
            break;

          case 't':
            c = '\t';
            break;

          case 'u':
            c = 0;
            for (int i = 0; i < 4; i++) {
              c = (char) ((c << 4) + getHexChar());
            }
            break;

          default:
            throw new IOException("Unsupported escape:" + c);
        }
      }
      result.append(c);
    }
    return result.toString();
  }

  char getHexChar() throws IOException {
    char c = nextChar();
    switch (c) {
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
        return (char) (c - '0');

      case 'a':
      case 'b':
      case 'c':
      case 'd':
      case 'e':
      case 'f':
        return (char) (c - 'a' + 10);

      case 'A':
      case 'B':
      case 'C':
      case 'D':
      case 'E':
      case 'F':
        return (char) (c - 'A' + 10);
    }
    throw new IOException("Bad hex in \\u escape: " + c);
  }

  char testNextNonWhiteSpaceChar() throws IOException {
    int save = index;
    char c = scan();
    index = save;
    return c;
  }

  void scanFor(char expected) throws IOException {
    char c = scan();
    if (c != expected) {
      throw new IOException("Expected '" + expected + "' but got '" + c + "'");
    }
  }

  char nextChar() throws IOException {
    if (index < maxLength) {
      return jsonData.charAt(index++);
    }
    throw new IOException("Unexpected EOF reached");
  }

  boolean isWhiteSpace(char c) {
    return c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09;
  }

  char scan() throws IOException {
    while (true) {
      char c = nextChar();
      if (isWhiteSpace(c)) {
        continue;
      }
      return c;
    }
  }
}