TestJsonExtract.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.operator.scalar;

import com.facebook.presto.common.function.SqlFunctionProperties;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import java.io.IOException;
import java.util.List;

import static com.facebook.presto.common.type.TimeZoneKey.UTC_KEY;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.operator.scalar.JsonExtract.JsonExtractor;
import static com.facebook.presto.operator.scalar.JsonExtract.JsonValueJsonExtractor;
import static com.facebook.presto.operator.scalar.JsonExtract.ObjectFieldJsonExtractor;
import static com.facebook.presto.operator.scalar.JsonExtract.ScalarValueJsonExtractor;
import static com.facebook.presto.operator.scalar.JsonExtract.generateExtractor;
import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static java.util.Locale.ENGLISH;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;

public class TestJsonExtract
        extends AbstractTestFunctions
{
    public static final SqlFunctionProperties PROPERTIES_CANONICALIZED_EXTRACT_ENABLED =
            SqlFunctionProperties.builder().setTimeZoneKey(UTC_KEY).setLegacyTimestamp(true).setSessionStartTime(0).setSessionLocale(ENGLISH).setSessionUser("user").setCanonicalizedJsonExtract(true).build();

    public static final SqlFunctionProperties PROPERTIES_CANONICALIZED_EXTRACT_DISABLED =
            SqlFunctionProperties.builder().setTimeZoneKey(UTC_KEY).setLegacyTimestamp(true).setSessionStartTime(0).setSessionLocale(ENGLISH).setSessionUser("user").setCanonicalizedJsonExtract(false).build();
    @BeforeClass
    public void setUp()
    {
        // for "utf8" function
        registerScalar(TestStringFunctions.class);
    }

    @Test
    public void testJsonTokenizer()
    {
        assertEquals(tokenizePath("$"), ImmutableList.of());
        assertEquals(tokenizePath("$"), ImmutableList.of());
        assertEquals(tokenizePath("$.foo"), ImmutableList.of("foo"));
        assertEquals(tokenizePath("$[\"foo\"]"), ImmutableList.of("foo"));
        assertEquals(tokenizePath("$[\"foo.bar\"]"), ImmutableList.of("foo.bar"));
        assertEquals(tokenizePath("$[42]"), ImmutableList.of("42"));
        assertEquals(tokenizePath("$.42"), ImmutableList.of("42"));
        assertEquals(tokenizePath("$.42.63"), ImmutableList.of("42", "63"));
        assertEquals(tokenizePath("$.foo.42.bar.63"), ImmutableList.of("foo", "42", "bar", "63"));
        assertEquals(tokenizePath("$.x.foo"), ImmutableList.of("x", "foo"));
        assertEquals(tokenizePath("$.x[\"foo\"]"), ImmutableList.of("x", "foo"));
        assertEquals(tokenizePath("$.x[42]"), ImmutableList.of("x", "42"));
        assertEquals(tokenizePath("$.foo_42._bar63"), ImmutableList.of("foo_42", "_bar63"));
        assertEquals(tokenizePath("$[foo_42][_bar63]"), ImmutableList.of("foo_42", "_bar63"));
        assertEquals(tokenizePath("$.foo:42.:bar63"), ImmutableList.of("foo:42", ":bar63"));
        assertEquals(tokenizePath("$[\"foo:42\"][\":bar63\"]"), ImmutableList.of("foo:42", ":bar63"));

        assertPathToken("foo");

        assertQuotedPathToken("-1.1");
        assertQuotedPathToken("!@#$%^&*()[]{}/?'");
        assertQuotedPathToken("ab\u0001c");
        assertQuotedPathToken("ab\0c");
        assertQuotedPathToken("ab\t\n\rc");
        assertQuotedPathToken(".");
        assertQuotedPathToken("$");
        assertQuotedPathToken("]");
        assertQuotedPathToken("[");
        assertQuotedPathToken("'");
        assertQuotedPathToken("!@#$%^&*(){}[]<>?/|.,`~\r\n\t \0");
        assertQuotedPathToken("a\\\\b\\\"", "a\\b\"");

        // backslash not followed by valid escape
        assertInvalidPath("$[\"a\\ \"]");

        // colon in subscript must be quoted
        assertInvalidPath("$[foo:bar]");

        // whitespace is not allowed
        assertInvalidPath(" $.x");
        assertInvalidPath(" $.x ");
        assertInvalidPath("$. x");
        assertInvalidPath("$ .x");
        assertInvalidPath("$\n.x");
        assertInvalidPath("$.x [42]");
        assertInvalidPath("$.x[ 42]");
        assertInvalidPath("$.x[42 ]");
        assertInvalidPath("$.x[ \"foo\"]");
        assertInvalidPath("$.x[\"foo\" ]");
    }

    private static void assertPathToken(String fieldName)
    {
        assertTrue(fieldName.indexOf('"') < 0);
        assertEquals(tokenizePath("$." + fieldName), ImmutableList.of(fieldName));
        assertEquals(tokenizePath("$.foo." + fieldName + ".bar"), ImmutableList.of("foo", fieldName, "bar"));
        assertPathTokenQuoting(fieldName);
    }

    private static void assertQuotedPathToken(String fieldName)
    {
        assertQuotedPathToken(fieldName, fieldName);
    }

    private static void assertQuotedPathToken(String fieldName, String expectedTokenizedField)
    {
        assertPathTokenQuoting(fieldName, expectedTokenizedField);
        // without quoting we should get an error
        assertInvalidPath("$." + fieldName);
    }

    private static void assertPathTokenQuoting(String fieldName)
    {
        assertPathTokenQuoting(fieldName, fieldName);
    }

    private static void assertPathTokenQuoting(String fieldName, String expectedTokenizedField)
    {
        assertEquals(tokenizePath("$[\"" + fieldName + "\"]"), ImmutableList.of(expectedTokenizedField));
        assertEquals(tokenizePath("$.foo[\"" + fieldName + "\"].bar"), ImmutableList.of("foo", expectedTokenizedField, "bar"));
    }

    public static void assertInvalidPath(String path)
    {
        try {
            tokenizePath(path);
            fail("Expected PrestoException");
        }
        catch (PrestoException e) {
            assertEquals(e.getErrorCode(), INVALID_FUNCTION_ARGUMENT.toErrorCode());
        }
    }

    @Test
    public void testScalarValueJsonExtractor()
            throws Exception
    {
        ScalarValueJsonExtractor extractor = new ScalarValueJsonExtractor();

        // Check scalar values
        assertEquals(doExtractLegacy(extractor, "123"), "123");
        assertEquals(doExtractLegacy(extractor, "-1"), "-1");
        assertEquals(doExtractLegacy(extractor, "0.01"), "0.01");
        assertEquals(doExtractLegacy(extractor, "\"abc\""), "abc");
        assertEquals(doExtractLegacy(extractor, "\"\""), "");
        assertNull(doExtractLegacy(extractor, "null"));
        assertEquals(doCanonicalizedExtract(extractor, "123"), "123");
        assertEquals(doCanonicalizedExtract(extractor, "-1"), "-1");
        assertEquals(doCanonicalizedExtract(extractor, "0.01"), "0.01");
        assertEquals(doCanonicalizedExtract(extractor, "\"abc\""), "abc");
        assertEquals(doCanonicalizedExtract(extractor, "\"\""), "");
        assertNull(doCanonicalizedExtract(extractor, "null"));

        // Test character escaped values
        assertEquals(doExtractLegacy(extractor, "\"ab\\u0001c\""), "ab\001c");
        assertEquals(doExtractLegacy(extractor, "\"ab\\u0002c\""), "ab\002c");
        assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0001c\""), "ab\001c");
        assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0002c\""), "ab\002c");

        // Complex types should return null
        assertNull(doExtractLegacy(extractor, "[1, 2, 3]"));
        assertNull(doExtractLegacy(extractor, "{\"a\": 1}"));
        assertNull(doCanonicalizedExtract(extractor, "[1, 2, 3]"));
        assertNull(doCanonicalizedExtract(extractor, "{\"a\": 1}"));
    }

    @Test
    public void testJsonValueJsonExtractor()
            throws Exception
    {
        JsonValueJsonExtractor extractor = new JsonValueJsonExtractor();

        // Check scalar values
        assertEquals(doExtractLegacy(extractor, "123"), "123");
        assertEquals(doExtractLegacy(extractor, "-1"), "-1");
        assertEquals(doExtractLegacy(extractor, "0.01"), "0.01");
        assertEquals(doExtractLegacy(extractor, "\"abc\""), "\"abc\"");
        assertEquals(doExtractLegacy(extractor, "\"\""), "\"\"");
        assertEquals(doExtractLegacy(extractor, "null"), "null");
        assertEquals(doCanonicalizedExtract(extractor, "123"), "123");
        assertEquals(doCanonicalizedExtract(extractor, "-1"), "-1");
        assertEquals(doCanonicalizedExtract(extractor, "0.01"), "0.01");
        assertEquals(doCanonicalizedExtract(extractor, "\"abc\""), "\"abc\"");
        assertEquals(doCanonicalizedExtract(extractor, "\"\""), "\"\"");
        assertEquals(doCanonicalizedExtract(extractor, "null"), "null");

        // Test character escaped values
        assertEquals(doExtractLegacy(extractor, "\"ab\\u0001c\""), "\"ab\\u0001c\"");
        assertEquals(doExtractLegacy(extractor, "\"ab\\u0002c\""), "\"ab\\u0002c\"");
        assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0001c\""), "\"ab\\u0001c\"");
        assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0002c\""), "\"ab\\u0002c\"");

        // Complex types should return json values
        assertEquals(doExtractLegacy(extractor, "[1, 2, 3]"), "[1,2,3]");
        assertEquals(doExtractLegacy(extractor, "{\"a\": 1}"), "{\"a\":1}");
        assertEquals(doCanonicalizedExtract(extractor, "[1, 2, 3]"), "[1,2,3]");
        assertEquals(doCanonicalizedExtract(extractor, "{\"a\": 1}"), "{\"a\":1}");
    }

    @Test
    public void testArrayElementJsonExtractor()
            throws Exception
    {
        ObjectFieldJsonExtractor<Slice> firstExtractor = new ObjectFieldJsonExtractor<>("0", new ScalarValueJsonExtractor());
        ObjectFieldJsonExtractor<Slice> secondExtractor = new ObjectFieldJsonExtractor<>("1", new ScalarValueJsonExtractor());

        assertNull(doExtractLegacy(firstExtractor, "[]"));
        assertEquals(doExtractLegacy(firstExtractor, "[1, 2, 3]"), "1");
        assertEquals(doExtractLegacy(secondExtractor, "[1, 2]"), "2");
        assertNull(doExtractLegacy(secondExtractor, "[1, null]"));
        assertNull(doCanonicalizedExtract(firstExtractor, "[]"));
        assertEquals(doCanonicalizedExtract(firstExtractor, "[1, 2, 3]"), "1");
        assertEquals(doCanonicalizedExtract(secondExtractor, "[1, 2]"), "2");
        assertNull(doCanonicalizedExtract(secondExtractor, "[1, null]"));
        // Out of bounds
        assertNull(doExtractLegacy(secondExtractor, "[1]"));
        assertNull(doCanonicalizedExtract(secondExtractor, "[1]"));
        // Check skipping complex structures
        assertEquals(doExtractLegacy(secondExtractor, "[{\"a\": 1}, 2, 3]"), "2");
        assertEquals(doCanonicalizedExtract(secondExtractor, "[{\"a\": 1}, 2, 3]"), "2");
    }

    @Test
    public void testObjectFieldJsonExtractor()
            throws Exception
    {
        ObjectFieldJsonExtractor<Slice> extractor = new ObjectFieldJsonExtractor<>("fuu", new ScalarValueJsonExtractor());

        assertNull(doExtractLegacy(extractor, "{}"));
        assertNull(doExtractLegacy(extractor, "{\"a\": 1}"));
        assertEquals(doExtractLegacy(extractor, "{\"fuu\": 1}"), "1");
        assertEquals(doExtractLegacy(extractor, "{\"a\": 0, \"fuu\": 1}"), "1");
        assertNull(doCanonicalizedExtract(extractor, "{}"));
        assertNull(doCanonicalizedExtract(extractor, "{\"a\": 1}"));
        assertEquals(doCanonicalizedExtract(extractor, "{\"fuu\": 1}"), "1");
        assertEquals(doCanonicalizedExtract(extractor, "{\"a\": 0, \"fuu\": 1}"), "1");
        // Check skipping complex structures
        assertEquals(doCanonicalizedExtract(extractor, "{\"a\": [1, 2, 3], \"fuu\": 1}"), "1");
    }

    @Test
    public void testFullScalarExtract()
    {
        assertNull(doScalarExtractLegacy("{}", "$"));
        assertNull(doScalarExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu")); // Null b/c value is complex type
        assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$.fuu"), "1");
        assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$[fuu]"), "1");
        assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertNull(doScalarExtractLegacy("{\"fuu\": null}", "$.fuu"));
        assertNull(doScalarExtractLegacy("{\"fuu\": 1}", "$.bar"));
        assertEquals(doScalarExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\001"); // Test escaped characters
        assertEquals(doScalarExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "abc");
        assertEquals(doScalarExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1");
        assertNull(doScalarExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]")); // Null b/c value is complex type
        assertEquals(doScalarExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101");
        assertEquals(doScalarExtractLegacy("{\"fuu\": [0, {\"bar\": {\"key\" : [\"value\"]}}, 2]}", "$.fuu[1].bar.key[0]"), "value");

        assertNull(doScalarCanonicalizedExtract("{}", "$"));
        assertNull(doScalarCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu")); // Null b/c value is complex type
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$.fuu"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$[fuu]"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertNull(doScalarCanonicalizedExtract("{\"fuu\": null}", "$.fuu"));
        assertNull(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$.bar"));
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\001"); // Test escaped characters
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "abc");
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1");
        assertNull(doScalarCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]")); // Null b/c value is complex type
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101");
        assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0, {\"bar\": {\"key\" : [\"value\"]}}, 2]}", "$.fuu[1].bar.key[0]"), "value");

        // Test non-object extraction
        assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[0]"), "0");
        assertEquals(doScalarExtractLegacy("\"abc\"", "$"), "abc");
        assertEquals(doScalarExtractLegacy("123", "$"), "123");
        assertNull(doScalarExtractLegacy("null", "$"));

        assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[0]"), "0");
        assertEquals(doScalarCanonicalizedExtract("\"abc\"", "$"), "abc");
        assertEquals(doScalarCanonicalizedExtract("123", "$"), "123");
        assertNull(doScalarCanonicalizedExtract("null", "$"));

        // Test numeric path expression matches arrays and objects
        assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$.1"), "1");
        assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[1]"), "1");
        assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[\"1\"]"), "1");
        assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1");
        assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1");
        assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1");

        assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$.1"), "1");
        assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[1]"), "1");
        assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[\"1\"]"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1");

        // Test fields starting with a digit
        assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1");
        assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1");
        assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1");

        assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1");
        assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1");
    }

    @Test
    public void testFullJsonExtract()
    {
        assertEquals(doJsonExtractLegacy("{}", "$"), "{}");
        assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu"), "{\"bar\":1}");
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$.fuu"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[fuu]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": null}", "$.fuu"), "null");
        assertNull(doJsonExtractLegacy("{\"fuu\": 1}", "$.bar"));
        assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "\"abc\"");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), "[100,101]");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101");

        assertEquals(doJsonCanonicalizedExtract("{}", "$"), "{}");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu"), "{\"bar\":1}");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$.fuu"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[fuu]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": null}", "$.fuu"), "null");
        assertNull(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$.bar"));
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "\"abc\"");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), "[100,101]");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101");

        // Test non-object extraction
        assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[0]"), "0");
        assertEquals(doJsonExtractLegacy("\"abc\"", "$"), "\"abc\"");
        assertEquals(doJsonExtractLegacy("123", "$"), "123");
        assertEquals(doJsonExtractLegacy("null", "$"), "null");

        assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[0]"), "0");
        assertEquals(doJsonCanonicalizedExtract("\"abc\"", "$"), "\"abc\"");
        assertEquals(doJsonCanonicalizedExtract("123", "$"), "123");
        assertEquals(doJsonCanonicalizedExtract("null", "$"), "null");

        // Test extraction using bracket json path
        assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"]"), "{\"bar\":1}");
        assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"][\"bar\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": null}", "$[\"fuu\"]"), "null");
        assertNull(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"bar\"]"));
        assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$[\"bar\"]"), "\"abc\"");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$[\"fuu\"][0]"), "0.1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1]"), "[100,101]");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1][1]"), "101");

        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"]"), "{\"bar\":1}");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"][\"bar\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": null}", "$[\"fuu\"]"), "null");
        assertNull(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"bar\"]"));
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$[\"bar\"]"), "\"abc\"");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$[\"fuu\"][0]"), "0.1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1]"), "[100,101]");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1][1]"), "101");

        // Test extraction using bracket json path with special json characters in path
        assertEquals(doJsonExtractLegacy("{\"@$fuu\": {\".b.ar\": 1}}", "$[\"@$fuu\"]"), "{\".b.ar\":1}");
        assertEquals(doJsonExtractLegacy("{\"fuu..\": 1}", "$[\"fuu..\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fu*u\": null}", "$[\"fu*u\"]"), "null");
        assertNull(doJsonExtractLegacy("{\",fuu\": 1}", "$[\"bar\"]"));
        assertEquals(doJsonExtractLegacy("{\",fuu\": [\"\\u0001\"]}", "$[\",fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonExtractLegacy("{\":fu:u:\": 1, \":b:ar:\": \"abc\"}", "$[\":b:ar:\"]"), "\"abc\"");
        assertEquals(doJsonExtractLegacy("{\"?()fuu\": [0.1, 1, 2]}", "$[\"?()fuu\"][0]"), "0.1");
        assertEquals(doJsonExtractLegacy("{\"f?uu\": [0, [100, 101], 2]}", "$[\"f?uu\"][1]"), "[100,101]");
        assertEquals(doJsonExtractLegacy("{\"fuu()\": [0, [100, 101], 2]}", "$[\"fuu()\"][1][1]"), "101");

        assertEquals(doJsonCanonicalizedExtract("{\"@$fuu\": {\".b.ar\": 1}}", "$[\"@$fuu\"]"), "{\".b.ar\":1}");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu..\": 1}", "$[\"fuu..\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fu*u\": null}", "$[\"fu*u\"]"), "null");
        assertNull(doJsonCanonicalizedExtract("{\",fuu\": 1}", "$[\"bar\"]"));
        assertEquals(doJsonCanonicalizedExtract("{\",fuu\": [\"\\u0001\"]}", "$[\",fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonCanonicalizedExtract("{\":fu:u:\": 1, \":b:ar:\": \"abc\"}", "$[\":b:ar:\"]"), "\"abc\"");
        assertEquals(doJsonCanonicalizedExtract("{\"?()fuu\": [0.1, 1, 2]}", "$[\"?()fuu\"][0]"), "0.1");
        assertEquals(doJsonCanonicalizedExtract("{\"f?uu\": [0, [100, 101], 2]}", "$[\"f?uu\"][1]"), "[100,101]");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu()\": [0, [100, 101], 2]}", "$[\"fuu()\"][1][1]"), "101");

        // Test extraction using mix of bracket and dot notation json path
        assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"].bar"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu[\"bar\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters

        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"].bar"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu[\"bar\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters
        assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters

        // Test extraction using  mix of bracket and dot notation json path with special json characters in path
        assertEquals(doJsonExtractLegacy("{\"@$fuu\": {\"bar\": 1}}", "$[\"@$fuu\"].bar"), "1");
        assertEquals(doJsonExtractLegacy("{\",fuu\": {\"bar\": [\"\\u0001\"]}}", "$[\",fuu\"].bar[0]"), "\"\\u0001\""); // Test escaped characters

        assertEquals(doJsonCanonicalizedExtract("{\"@$fuu\": {\"bar\": 1}}", "$[\"@$fuu\"].bar"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\",fuu\": {\"bar\": [\"\\u0001\"]}}", "$[\",fuu\"].bar[0]"), "\"\\u0001\""); // Test escaped characters

        // Test numeric path expression matches arrays and objects
        assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$.1"), "1");
        assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[1]"), "1");
        assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[\"1\"]"), "1");
        assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1");
        assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1");
        assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1");

        assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$.1"), "1");
        assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[1]"), "1");
        assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[\"1\"]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1");

        // Test fields starting with a digit
        assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1");
        assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1");
        assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1");

        assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1");
        assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1");
    }

    @Test
    public void testInvalidExtracts()
    {
        assertInvalidLegacyExtract("", "", "Invalid JSON path: ''");
        assertInvalidLegacyExtract("{}", "$.bar[2][-1]", "Invalid JSON path: '$.bar[2][-1]'");
        assertInvalidLegacyExtract("{}", "$.fuu..bar", "Invalid JSON path: '$.fuu..bar'");
        assertInvalidLegacyExtract("{}", "$.", "Invalid JSON path: '$.'");
        assertInvalidLegacyExtract("", "$$", "Invalid JSON path: '$$'");
        assertInvalidLegacyExtract("", " ", "Invalid JSON path: ' '");
        assertInvalidLegacyExtract("", ".", "Invalid JSON path: '.'");
        assertInvalidLegacyExtract("{ \"store\": { \"book\": [{ \"title\": \"title\" }] } }", "$.store.book[", "Invalid JSON path: '$.store.book['");

        assertInvalidCanonicalizedExtract("", "", "Invalid JSON path: ''");
        assertInvalidCanonicalizedExtract("{}", "$.bar[2][-1]", "Invalid JSON path: '$.bar[2][-1]'");
        assertInvalidCanonicalizedExtract("{}", "$.fuu..bar", "Invalid JSON path: '$.fuu..bar'");
        assertInvalidCanonicalizedExtract("{}", "$.", "Invalid JSON path: '$.'");
        assertInvalidCanonicalizedExtract("", "$$", "Invalid JSON path: '$$'");
        assertInvalidCanonicalizedExtract("", " ", "Invalid JSON path: ' '");
        assertInvalidCanonicalizedExtract("", ".", "Invalid JSON path: '.'");
        assertInvalidCanonicalizedExtract("{ \"store\": { \"book\": [{ \"title\": \"title\" }] } }", "$.store.book[", "Invalid JSON path: '$.store.book['");
    }

    @Test
    public void testNoAutomaticEncodingDetection()
    {
        // Automatic encoding detection treats the following input as UTF-32
        assertFunction("JSON_EXTRACT_SCALAR(UTF8(X'00 00 00 00 7b 22 72 22'), '$.x')", VARCHAR, null);
    }

    private static String doExtractLegacy(JsonExtractor<Slice> jsonExtractor, String json)
            throws IOException
    {
        Slice extract = jsonExtractor.extract(Slices.utf8Slice(json).getInput(), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED);
        return (extract == null) ? null : extract.toStringUtf8();
    }

    private static String doCanonicalizedExtract(JsonExtractor<Slice> jsonExtractor, String json)
            throws IOException
    {
        Slice extract = jsonExtractor.extract(Slices.utf8Slice(json).getInput(), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED);
        return (extract == null) ? null : extract.toStringUtf8();
    }

    private static String doScalarExtractLegacy(String inputJson, String jsonPath)
    {
        Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new ScalarValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED);
        return (value == null) ? null : value.toStringUtf8();
    }

    private static String doScalarCanonicalizedExtract(String inputJson, String jsonPath)
    {
        Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new ScalarValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED);
        return (value == null) ? null : value.toStringUtf8();
    }

    private static String doJsonExtractLegacy(String inputJson, String jsonPath)
    {
        Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED);
        return (value == null) ? null : value.toStringUtf8();
    }

    private static String doJsonCanonicalizedExtract(String inputJson, String jsonPath)
    {
        Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED);
        return (value == null) ? null : value.toStringUtf8();
    }

    private static List<String> tokenizePath(String path)
    {
        return ImmutableList.copyOf(new JsonPathTokenizer(path));
    }

    private static void assertInvalidLegacyExtract(String inputJson, String jsonPath, String message)
    {
        try {
            doJsonExtractLegacy(inputJson, jsonPath);
        }
        catch (PrestoException e) {
            assertEquals(e.getErrorCode(), INVALID_FUNCTION_ARGUMENT.toErrorCode());
            assertEquals(e.getMessage(), message);
        }
    }

    private static void assertInvalidCanonicalizedExtract(String inputJson, String jsonPath, String message)
    {
        try {
            doJsonCanonicalizedExtract(inputJson, jsonPath);
        }
        catch (PrestoException e) {
            assertEquals(e.getErrorCode(), INVALID_FUNCTION_ARGUMENT.toErrorCode());
            assertEquals(e.getMessage(), message);
        }
    }
}