JsonSanitizerTest.java

// Copyright (C) 2012 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.json;

import static com.google.json.JsonSanitizer.DEFAULT_NESTING_DEPTH;
import static com.google.json.JsonSanitizer.sanitize;

import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import junit.framework.TestCase;
import org.junit.Test;

@SuppressWarnings("javadoc")
public final class JsonSanitizerTest extends TestCase {

  private static void assertSanitized(String golden, String input) {
    assertSanitized(golden, input, DEFAULT_NESTING_DEPTH);
  }

  private static void assertSanitized(String golden, String input, int maximumNestingDepth) {
    String actual = sanitize(input, maximumNestingDepth);
    assertEquals(input, golden, actual);
    if (actual.equals(input)) {
      assertSame(input, input, actual);
    }
  }

  private static void assertSanitized(String sanitary) {
    assertSanitized(sanitary, sanitary);
  }

  @Test
  public static final void testSanitize() {
    // On the left is the sanitized output, and on the right the input.
    // If there is a single string, then the input is fine as-is.
    assertSanitized("null", null);
    assertSanitized("null", "");
    assertSanitized("null");
    assertSanitized("false");
    assertSanitized("true");
    assertSanitized(" false ");
    assertSanitized("  false");
    assertSanitized("false\n");
    assertSanitized("false", "false,true");
    assertSanitized("\"foo\"");
    assertSanitized("\"foo\"", "'foo'");
    assertSanitized(
        "\"\\u003cscript>foo()\\u003c/script>\"", "\"<script>foo()</script>\"");
    assertSanitized("\"\\u003c/SCRIPT\\n>\"", "\"</SCRIPT\n>\"");
    assertSanitized("\"\\u003c/ScRIpT\"", "\"</ScRIpT\"");
    // \u0130 is a Turkish dotted upper-case 'I' so the lower case version of
    // the tag name is "script".
    assertSanitized("\"\\u003c/ScR\u0130pT\"", "\"</ScR\u0130pT\"");
    assertSanitized("\"<b>Hello</b>\"");
    assertSanitized("\"<s>Hello</s>\"");
    assertSanitized("\"<[[\\u005d]>\"", "'<[[]]>'");
    assertSanitized("\"\\u005d]>\"", "']]>'");
    assertSanitized("[[0]]", "[[0]]>");
    assertSanitized("[1,-1,0.0,-0.5,1e2]", "[1,-1,0.0,-0.5,1e2,");
    assertSanitized("[1,2,3]", "[1,2,3,]");
    assertSanitized("[1,null,3]", "[1,,3,]");
    assertSanitized("[1 ,2 ,3]", "[1 2 3]");
    assertSanitized("{ \"foo\": \"bar\" }");
    assertSanitized("{ \"foo\": \"bar\" }", "{ \"foo\": \"bar\", }");
    assertSanitized("{\"foo\":\"bar\"}", "{\"foo\",\"bar\"}");
    assertSanitized("{ \"foo\": \"bar\" }", "{ foo: \"bar\" }");
    assertSanitized("{ \"foo\": \"bar\"}", "{ foo: 'bar");
    assertSanitized("{ \"foo\": [\"bar\"]}", "{ foo: ['bar");
    assertSanitized("false", "// comment\nfalse");
    assertSanitized("false", "false// comment");
    assertSanitized("false", "false// comment\n");
    assertSanitized("false", "false/* comment */");
    assertSanitized("false", "false/* comment *");
    assertSanitized("false", "false/* comment ");
    assertSanitized("false", "/*/true**/false");
    assertSanitized("1");
    assertSanitized("-1");
    assertSanitized("1.0");
    assertSanitized("-1.0");
    assertSanitized("1.05");
    assertSanitized("427.0953333");
    assertSanitized("6.0221412927e+23");
    assertSanitized("6.0221412927e23");
    assertSanitized("6.0221412927e0", "6.0221412927e");
    assertSanitized("6.0221412927e-0", "6.0221412927e-");
    assertSanitized("6.0221412927e+0", "6.0221412927e+");
    assertSanitized("1.660538920287695E-24");
    assertSanitized("-6.02e-23");
    assertSanitized("1.0", "1.");
    assertSanitized("0.5", ".5");
    assertSanitized("-0.5", "-.5");
    assertSanitized("0.5", "+.5");
    assertSanitized("0.5e2", "+.5e2");
    assertSanitized("1.5e+2", "+1.5e+2");
    assertSanitized("0.5e-2", "+.5e-2");
    assertSanitized("{\"0\":0}", "{0:0}");
    assertSanitized("{\"0\":0}", "{-0:0}");
    assertSanitized("{\"0\":0}", "{+0:0}");
    assertSanitized("{\"1\":0}", "{1.0:0}");
    assertSanitized("{\"1\":0}", "{1.:0}");
    assertSanitized("{\"0.5\":0}", "{.5:0}");
    assertSanitized("{\"-0.5\":0}", "{-.5:0}");
    assertSanitized("{\"0.5\":0}", "{+.5:0}");
    assertSanitized("{\"50\":0}", "{+.5e2:0}");
    assertSanitized("{\"150\":0}", "{+1.5e+2:0}");
    assertSanitized("{\"0.1\":0}", "{+.1:0}");
    assertSanitized("{\"0.01\":0}", "{+.01:0}");
    assertSanitized("{\"0.005\":0}", "{+.5e-2:0}");
    assertSanitized("{\"1e+101\":0}", "{10e100:0}");
    assertSanitized("{\"1e-99\":0}", "{10e-100:0}");
    assertSanitized("{\"1.05e-99\":0}", "{10.5e-100:0}");
    assertSanitized("{\"1.05e-99\":0}", "{10.500e-100:0}");
    assertSanitized("{\"1.234e+101\":0}", "{12.34e100:0}");
    assertSanitized("{\"1.234e-102\":0}", "{.01234e-100:0}");
    assertSanitized("{\"1.234e-102\":0}", "{.01234e-100:0}");
    assertSanitized("{}");
    // Remove grouping parentheses.
    assertSanitized("{}", "({})");
    // Escape code-points and isolated surrogates which are not XML embeddable.
    assertSanitized("\"\\u0000\\u0008\\u001f\"", "'\u0000\u0008\u001f'");
    assertSanitized("\"\ud800\udc00\\udc00\\ud800\"",
                    "'\ud800\udc00\udc00\ud800'");
    assertSanitized("\"\ufffd\\ufffe\\uffff\"", "'\ufffd\ufffe\uffff'");
    // These control characters should be elided if they appear outside a string
    // literal.
    assertSanitized("42", "\uffef\u000042\u0008\ud800\uffff\udc00");
    assertSanitized("null", "\uffef\u0000\u0008\ud800\uffff\udc00");
    assertSanitized("[null]", "[,]");
    assertSanitized("[null]", "[null,]");
    assertSanitized("{\"a\":0,\"false\":\"x\",\"\":{\"\":-1}}",
                    "{\"a\":0,false\"x\":{\"\":-1}}");
    assertSanitized("[true ,false]", "[true false]");
    assertSanitized("[\"\\u00a0\\u1234\"]");
    assertSanitized("{\"a\\b\":\"c\"}", "{a\\b\"c");
    assertSanitized("{\"a\":\"b\",\"c\":null}", "{\"a\":\"b\",\"c\":");
    assertSanitized(
        "{\"1e0001234567890123456789123456789123456789\":0}",
        // Exponent way out of representable range in a JS double.
        "{1e0001234567890123456789123456789123456789:0}"
                    );
    // Our octal recoder interprets an octal-like literal that includes a digit '8' or '9' as
    // decimal.
    assertSanitized("-16923547559", "-016923547559");
  }

  @Test
  public static final void testIssue3() {
    // These triggered index out of bounds and assertion errors.
    assertSanitized("[{\"\":{}}]", "[{{},\u00E4");
    assertSanitized("[{\"\":{}}]", "[{{\u00E4\u00E4},\u00E4");
  }

  @Test
  public static final void testIssue4() {
    // Make sure that bare words are quoted.
    assertSanitized("\"dev\"", "dev");
    assertSanitized("\"eval\"", "eval");
    assertSanitized("\"comment\"", "comment");
    assertSanitized("\"fasle\"", "fasle");
    assertSanitized("\"FALSE\"", "FALSE");
    assertSanitized("\"dev/comment\"", "dev/comment");
    assertSanitized("\"devcomment\"", "dev\\comment");
    assertSanitized("\"dev\\ncomment\"", "dev\\ncomment");
    assertSanitized("[\"dev\", \"comment\"]", "[dev\\, comment]");
  }

  @Test
  public static final void testMaximumNestingLevel() {
    String nestedMaps = "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}";
    String sanitizedNestedMaps = "{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}";

    boolean exceptionIfTooMuchNesting = false;
    try {
      assertSanitized(sanitizedNestedMaps, nestedMaps, DEFAULT_NESTING_DEPTH);
    } catch (ArrayIndexOutOfBoundsException e) {
      Logger.getAnonymousLogger().log(Level.FINEST, "Expected exception in testing maximum nesting level", e);
      exceptionIfTooMuchNesting = true;
    }
    assertTrue("Expecting failure for too nested JSON", exceptionIfTooMuchNesting);
    assertSanitized(sanitizedNestedMaps, nestedMaps, DEFAULT_NESTING_DEPTH + 1);
  }

  @Test
  public static final void testMaximumNestingLevelAssignment() {
    assertEquals(1, new JsonSanitizer("", Integer.MIN_VALUE).getMaximumNestingDepth());
    assertEquals(JsonSanitizer.MAXIMUM_NESTING_DEPTH, new JsonSanitizer("", Integer.MAX_VALUE).getMaximumNestingDepth());
  }

  @Test
  public static final void testUnopenedArray() {
    // Discovered by fuzzer with seed -Dfuzz.seed=df3b4778ce54d00a
    assertSanitized("-1742461140214282", "\ufeff-01742461140214282]");
  }

  @Test
  public static final void testIssue13() {
    assertSanitized(
        "[ { \"description\": \"aa##############aa\" }, 1 ]",
        "[ { \"description\": \"aa##############aa\" }, 1 ]");
  }

  @Test
  public static final void testHtmlParserStateChanges() {
    assertSanitized("\"\\u003cscript\"", "\"<script\"");
    assertSanitized("\"\\u003cScript\"", "\"<Script\"");
    // \u0130 is a Turkish dotted upper-case 'I' so the lower case version of
    // the tag name is "script".
    assertSanitized("\"\\u003cScR\u0130pT\"", "\"<ScR\u0130pT\"");
    assertSanitized("\"\\u003cSCRIPT\\n>\"", "\"<SCRIPT\n>\"");
    assertSanitized("\"script\"", "<script");

    assertSanitized("\"\\u003c!--\"", "\"<!--\"");
    assertSanitized("-0", "<!--");

    assertSanitized("\"--\\u003e\"", "\"-->\"");
    assertSanitized("-0", "-->");

    assertSanitized("\"\\u003c!--\\u003cscript>\"", "\"<!--<script>\"");
  }

  @Test
  public static final void testLongOctalNumberWithBadDigits() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    assertEquals(
            "-888888888888888888888",
            JsonSanitizer.sanitize("-0888888888888888888888")
    );
  }

  @Test
  public static final void testLongNumberInUnclosedInputWithU80() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    assertEquals(
            "{\"\":{\"\":{\"\":{\"\":{\"\":{\"\":{\"x80\":{\"\":{\"\":[-400557869725698078427]}}}}}}}}}",
            JsonSanitizer.sanitize("{{{{{{{\\x80{{([-053333333304233333333333")
    );
  }

  @Test
  public static final void testSlashFour() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    assertEquals("\"y\\u0004\"", JsonSanitizer.sanitize("y\\4")); // "y\4"
  }

  @Test
  public static final void testUnterminatedObject() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    String input = "?\u0000\u0000\u0000{{\u0000\ufffd\u0003]ve{R]\u00000\ufffd\u0016&e{\u0003]\ufffda<!.b<!<!cc1x\u0000\u00005{281<\u0000.{t\u0001\ufffd5\ufffd{5\ufffd\ufffd0\ufffd15\r\ufffd\u0000\u0000\u0000~~-0081273222428822883223759,55\ufffd\u0000\ufffd\t\u0000\ufffd";
    String got = JsonSanitizer.sanitize(input);
    String want = "{\"\":{},\"ve\":{\"R\":null},\"0\":\"e\",\"\":{},\"a<!.b<!<!cc1x\":5,\"\":{\"281\":0.0,\"\":{\"t\":5,\"\":{\"5\":0,\"15\"\r:-81273222428822883223759,\"55\"\t:null}}}}";
    assertEquals(want, got);
  }

  @Test
  public static final void testCrash1() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    String input = "?\u0000\u0000\u0000{{\u0000\ufffd\u0003]ve{R]\u00000\ufffd\ufffd\u0016&e{\u0003]\ufffda<!.b<!<!c\u00005{281<\u0000.{t\u0001\ufffd5\ufffd{515\r[\u0000\u0000\u0000~~-008127322242\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd23759,551x\u0000\u00006{281<\u0000.{t\u0001\ufffd5\ufffd{5\ufffd\ufffd0\ufffd15\r[\u0000\u0000\u0000~~-0081273222428822883223759,\ufffd";
    String want = "{\"\":{},\"ve\":{\"R\":null},\"0\":\"e\",\"\":{},\"a<!.b<!<!c\":5,\"\":{\"281\":0.0,\"\":{\"t\":5,\"\":{\"515\"\r:[-8127322242,23759,551,6,{\"281\":0.0,\"\":{\"t\":5,\"\":{\"5\":0,\"15\"\r:[-81273222428822883223759]}}}]}}}}";
    String got = JsonSanitizer.sanitize(input);
    assertEquals(want, got);
  }

  @Test
  public static final void testDisallowedSubstrings() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    String[] inputs = {
            "x<\\script>",
            "x</\\script>",
            "x</sc\\ript>",
            "x<\\163cript>",
            "x</\\163cript>",
            "x<\\123cript>",
            "x</\\123cript>",
            "u\\u\\uu\ufffd\ufffd\\u7u\\u\\u\\u\ufffdu<\\script>5",
            "z\\<\\!--",
            "z\\<!\\--",
            "z\\<!-\\-",
            "z\\<\\!--",
            "\"\\]]\\>",
    };
    for (String input : inputs) {
      String out = JsonSanitizer.sanitize(input).toLowerCase(Locale.ROOT);
      assertFalse(out, out.contains("<!--"));
      assertFalse(out, out.contains("-->"));
      assertFalse(out, out.contains("<script"));
      assertFalse(out, out.contains("</script"));
      assertFalse(out, out.contains("]]>"));
      assertFalse(out, out.contains("<![cdata["));
    }
  }

  @Test
  public static final void testXssPayload() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    String input = "x</\\script>u\\u\\uu\ufffd\ufffd\\u7u\\u\\u\\u\ufffdu<\\script>5+alert(1)//";
    assertEquals(
            "\"x\\u003c/script>uuuu\uFFFD\uFFFDu7uuuu\uFFFDu\\u003cscript>5+alert(1)//\"",
            JsonSanitizer.sanitize(input)
    );
  }

  @Test
  public static final void testInvalidOutput() {
    // Found by Fabian Meumertzheim using CI Fuzz (https://www.code-intelligence.com)
    String input = "\u0010{'\u0000\u0000'\"\u0000\"{.\ufffd-0X29295909049550970,\n\n0";
    String want = "{\"\\u0000\\u0000\":\"\\u0000\",\"\":{\"0\":-47455995597866469744,\n\n\"0\":null}}";
    String got = JsonSanitizer.sanitize(input);
    assertEquals(want, got);
  }

  @Test
  public static final void testBadNumber() {
    String input = "��0x.\\���4\\��906";
    String want = "0.0";
    String got = JsonSanitizer.sanitize(input);
    assertEquals(want, got);
  }

  @Test
  public static final void testDashDashGtEscaped() {
    String input = "'->??-\\->";
    String want = "\"->??--\\u003e\"";
    String got1 = JsonSanitizer.sanitize(input);
    assertEquals(want, got1);
    String got2 = JsonSanitizer.sanitize(got1);
    assertEquals(want, got2);
  }

  @Test
  public static final void testDashDashGtUEscaped() {
    String input = "'.\\u002D->'";
    String want = "\".\\u002D-\\u003e\"";
    String got1 = JsonSanitizer.sanitize(input);
    assertEquals(want, got1);
    String got2 = JsonSanitizer.sanitize(got1);
    assertEquals(want, got2);
  }

  @Test
  public static final void testEscHtmlCommentClose() {
    String input = "x--\\>";
    String want = "\"x--\\u003e\"";
    String got1 = JsonSanitizer.sanitize(input);
    assertEquals(want, got1);
    String got2 = JsonSanitizer.sanitize(got1);
    assertEquals(want, got2);
  }
}