UTF8NamesParseTest.java

package com.fasterxml.jackson.core.read;


import java.io.*;
import java.util.Random;

import org.junit.jupiter.api.Test;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.io.SerializedString;

import static org.junit.jupiter.api.Assertions.*;

/**
 * Set of basic unit tests for verifying that the basic parser
 * functionality works as expected.
 */
public class UTF8NamesParseTest
    extends JUnit5TestBase
{
    final static String[] UTF8_2BYTE_STRINGS = new String[] {
        // This may look funny, but UTF8 scanner has fairly elaborate decoding
        // machinery, and it is indeed necessary to try out various combinations...
        "b", "\u00d8", "A\u00D8", "ab\u00D8d", "abc\u00d8", "c3p0",
        "1234\u00C75",
        "......",
        "Long\u00FAer",
        "Latin1-fully-\u00BE-develop\u00A8d",
        "Some very long name, ridiculously long actually to see that buffer expansion works: \u00BF?"
    };

    final static String[] UTF8_3BYTE_STRINGS = new String[] {
        "\uC823?", "A\u400F", "1\u1234?", "ab\u1234d",
        "Ab123\u4034",
        "Long \uC023 ish",
        "Bit longer:\uC023",
        "Even-longer:\u3456",
        "Yet bit longer \uC023",
        "Even more \u3456 longer",
        "\uC023 Possibly ridiculous",
        "But \uC023 this takes the cake",
    };

    @Test
    void emptyName() throws Exception
    {
        _testEmptyName(MODE_INPUT_STREAM);
        _testEmptyName(MODE_INPUT_STREAM_THROTTLED);
        _testEmptyName(MODE_DATA_INPUT);
    }

    private void _testEmptyName(int mode) throws Exception
    {
        final String DOC = "{ \"\" : \"\" }";
        JsonParser p = createParser(mode, DOC);
        assertToken(JsonToken.START_OBJECT, p.nextToken());
        assertToken(JsonToken.FIELD_NAME, p.nextToken());
        assertEquals("", p.currentName());
        assertToken(JsonToken.VALUE_STRING, p.nextToken());
        assertEquals("", p.getText());
        assertToken(JsonToken.END_OBJECT, p.nextToken());
        p.close();
    }

    @Test
    void utf8Name2Bytes() throws Exception
    {
        _testUtf8Name2Bytes(MODE_INPUT_STREAM);
        _testUtf8Name2Bytes(MODE_INPUT_STREAM_THROTTLED);
        _testUtf8Name2Bytes(MODE_DATA_INPUT);
    }

    private void _testUtf8Name2Bytes(int mode) throws Exception
    {
        final String[] NAMES = UTF8_2BYTE_STRINGS;

        for (int i = 0; i < NAMES.length; ++i) {
            String NAME = NAMES[i];
            String DOC = "{ \""+NAME+"\" : 0 }";
            JsonParser p = createParser(mode, DOC);
            assertToken(JsonToken.START_OBJECT, p.nextToken());

            assertToken(JsonToken.FIELD_NAME, p.nextToken());

            assertTrue(p.hasToken(JsonToken.FIELD_NAME));
            assertTrue(p.hasTokenId(JsonTokenId.ID_FIELD_NAME));

            assertEquals(NAME, p.currentName());
            assertToken(JsonToken.VALUE_NUMBER_INT, p.nextToken());
            assertTrue(p.hasToken(JsonToken.VALUE_NUMBER_INT));
            assertTrue(p.hasTokenId(JsonTokenId.ID_NUMBER_INT));

            // should retain name during value entry, too
            assertEquals(NAME, p.currentName());

            assertToken(JsonToken.END_OBJECT, p.nextToken());
            p.close();
        }
    }

    @Test
    void utf8Name3Bytes() throws Exception
    {
        _testUtf8Name3Bytes(MODE_INPUT_STREAM);
        _testUtf8Name3Bytes(MODE_DATA_INPUT);
        _testUtf8Name3Bytes(MODE_INPUT_STREAM_THROTTLED);
    }

    public void _testUtf8Name3Bytes(int mode) throws Exception
    {
        final String[] NAMES = UTF8_3BYTE_STRINGS;

        for (int i = 0; i < NAMES.length; ++i) {
            String NAME = NAMES[i];
            String DOC = "{ \""+NAME+"\" : true }";

            JsonParser p = createParser(mode, DOC);
            assertToken(JsonToken.START_OBJECT, p.nextToken());

            assertToken(JsonToken.FIELD_NAME, p.nextToken());
            assertEquals(NAME, p.currentName());
            assertToken(JsonToken.VALUE_TRUE, p.nextToken());
            assertEquals(NAME, p.currentName());

            assertToken(JsonToken.END_OBJECT, p.nextToken());

            p.close();
        }
    }

    // How about tests for Surrogate-Pairs?

    @Test
    void utf8StringTrivial() throws Exception
    {
        _testUtf8StringTrivial(MODE_INPUT_STREAM);
        _testUtf8StringTrivial(MODE_DATA_INPUT);
        _testUtf8StringTrivial(MODE_INPUT_STREAM_THROTTLED);
    }

    public void _testUtf8StringTrivial(int mode) throws Exception
    {
        String[] VALUES = UTF8_2BYTE_STRINGS;
        for (int i = 0; i < VALUES.length; ++i) {
            String VALUE = VALUES[i];
            String DOC = "[ \""+VALUE+"\" ]";
            JsonParser p = createParser(mode, DOC);
            assertToken(JsonToken.START_ARRAY, p.nextToken());
            assertToken(JsonToken.VALUE_STRING, p.nextToken());
            String act = getAndVerifyText(p);
            if (act.length() != VALUE.length()) {
                fail("Failed for value #"+(i+1)+"/"+VALUES.length+": length was "+act.length()+", should be "+VALUE.length());
            }
            assertEquals(VALUE, act);
            assertToken(JsonToken.END_ARRAY, p.nextToken());
            p.close();
        }

        VALUES = UTF8_3BYTE_STRINGS;
        for (int i = 0; i < VALUES.length; ++i) {
            String VALUE = VALUES[i];
            String DOC = "[ \""+VALUE+"\" ]";
            JsonParser p = createParser(mode, DOC);
            assertToken(JsonToken.START_ARRAY, p.nextToken());
            assertToken(JsonToken.VALUE_STRING, p.nextToken());
            assertEquals(VALUE, getAndVerifyText(p));
            assertToken(JsonToken.END_ARRAY, p.nextToken());
            p.close();
        }
    }

    @Test
    void utf8StringValue() throws Exception
    {
        _testUtf8StringValue(MODE_INPUT_STREAM, 2900);
        _testUtf8StringValue(MODE_DATA_INPUT, 2900);
        _testUtf8StringValue(MODE_INPUT_STREAM_THROTTLED, 2900);

        _testUtf8StringValue(MODE_INPUT_STREAM, 5300);
        _testUtf8StringValue(MODE_DATA_INPUT, 5300);
        _testUtf8StringValue(MODE_INPUT_STREAM_THROTTLED, 5300);
    }

    public void _testUtf8StringValue(int mode, int len) throws Exception
    {
        Random r = new Random(13);
        StringBuilder sb = new StringBuilder(len + 20);
        while (sb.length() < len) {
            int c;
            if (r.nextBoolean()) { // ascii
                c = 32 + (r.nextInt() & 0x3F);
                if (c == '"' || c == '\\') {
                    c = ' ';
                }
            } else if (r.nextBoolean()) { // 2-byte
                c = 160 + (r.nextInt() & 0x3FF);
            } else if (r.nextBoolean()) { // 3-byte (non-surrogate)
                c = 8000 + (r.nextInt() & 0x7FFF);
            } else { // surrogates (2 chars)
                int value = r.nextInt() & 0x3FFFF; // 20-bit, ~ 1 million
                sb.append((char) (0xD800 + (value >> 10)));
                c = (0xDC00 + (value & 0x3FF));

            }
            sb.append((char) c);
        }

        final String VALUE = sb.toString();
        ByteArrayOutputStream bout = new ByteArrayOutputStream(len + (len >> 2));
        OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
        out.write("[\"");
        out.write(VALUE);
        out.write("\"]");
        out.close();

        JsonParser p = createParser(mode, bout.toByteArray());
        assertToken(JsonToken.START_ARRAY, p.nextToken());
        assertToken(JsonToken.VALUE_STRING, p.nextToken());
        String act = p.getText();

        assertEquals(VALUE.length(), act.length());
        assertEquals(VALUE, act);
        p.close();

        // But how about as key
        bout = new ByteArrayOutputStream(len + (len >> 2));
        out = new OutputStreamWriter(bout, "UTF-8");
        out.write("{\"");
        out.write(VALUE);
        out.write("\":42}");
        out.close();

        p = createParser(mode, bout.toByteArray());
        assertToken(JsonToken.START_OBJECT, p.nextToken());
        assertToken(JsonToken.FIELD_NAME, p.nextToken());

        act = p.getText();
        assertEquals(VALUE.length(), act.length());
        assertEquals(VALUE, act);

        assertToken(JsonToken.VALUE_NUMBER_INT, p.nextToken());
        assertEquals(42, p.getIntValue());
        assertToken(JsonToken.END_OBJECT, p.nextToken());

        p.close();
    }

    @Test
    void nextFieldName() throws IOException
    {
		ByteArrayOutputStream os = new ByteArrayOutputStream();
		os.write('{');
		for (int i = 0; i < 3994; i++) {
			os.write(' ');
		}
		os.write("\"id\":2".getBytes("UTF-8"));
		os.write('}');
		byte[] data = os.toByteArray();

		_testNextFieldName(MODE_INPUT_STREAM, data);
          _testNextFieldName(MODE_DATA_INPUT, data);
          _testNextFieldName(MODE_INPUT_STREAM_THROTTLED, data);
    }

    private void _testNextFieldName(int mode, byte[] doc) throws IOException
    {
        SerializedString id = new SerializedString("id");
        JsonParser parser = createParser(mode, doc);
        assertEquals(JsonToken.START_OBJECT, parser.nextToken());
        assertTrue(parser.nextFieldName(id));
        assertEquals(JsonToken.VALUE_NUMBER_INT, parser.nextToken());
        assertEquals(JsonToken.END_OBJECT, parser.nextToken());
        parser.close();
    }
}