TestCMapParser.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.fontbox.cmap;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.junit.jupiter.api.Test;

/**
 * This will test the CMapParser implementation.
 *
 */
class TestCMapParser
{

    /**
     * Check whether the parser and the resulting mapping is working correct.
     *
     * @throws IOException If something went wrong
     */
    @Test
    void testLookup() throws IOException
    {
        final String resourceDir = "src/test/resources/cmap";
        File inDir = new File(resourceDir);

        CMap cMap = new CMapParser()
                .parse(new RandomAccessReadBufferedFile(new File(inDir, "CMapTest")));

        // char mappings
        byte[] bytes1 = {0, 1};
        assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange <0001> <0005> <0041>");

        byte[] bytes2 = {1, 00};
        String str2 = "0";
        assertEquals(str2,
                cMap.toUnicode(bytes2), "bytes 01 00 from bfrange <0100> <0109> <0030>");

        byte[] bytes3 = { 1, 32 };
        assertEquals("P", cMap.toUnicode(bytes3), "bytes 01 00 from bfrange <0100> <0109> <0030>");

        byte[] bytes4 = { 1, 33 };
        assertEquals("R", cMap.toUnicode(bytes4), "bytes 01 00 from bfrange <0100> <0109> <0030>");

        byte[] bytes5 = { 0, 10 };
        String str5 = "*";
        assertEquals(str5, cMap.toUnicode(bytes5), "bytes 00 0A from bfchar <000A> <002A>");

        byte[] bytes6 = { 1, 10 };
        String str6 = "+";
        assertEquals(str6, cMap.toUnicode(bytes6), "bytes 01 0A from bfchar <010A> <002B>");

        // CID mappings
        byte[] cid1 = { 0, 65 };
        assertEquals(65, cMap.toCID(cid1), "CID 65 from cidrange <0000> <00ff> 0 ");

        byte[] cid2 = { 1, 24 };
        int strCID2 = 0x0118;
        assertEquals(strCID2, cMap.toCID(cid2), "CID 280 from cidrange <0100> <01ff> 256");

        byte[] cid3 = { 2, 8 };
        int strCID3 = 0x0208;
        assertEquals(strCID3, cMap.toCID(cid3), "CID 520 from cidchar <0208> 520");

        byte[] cid4 = { 1, 0x2c };
        int strCID4 = 0x12C;
        assertEquals(strCID4, cMap.toCID(cid4), "CID 300 from cidrange <0300> <0300> 300");
    }

    @Test
    void testIdentity() throws IOException
    {
        CMap cMap = new CMapParser().parsePredefined("Identity-H");

        assertEquals(65, cMap.toCID(new byte[] { 0, 65 }), "Indentity-H CID 65");
        assertEquals(12345, cMap.toCID(new byte[] { 0x30, 0x39 }), "Indentity-H CID 12345");
        assertEquals(0xFFFF, cMap.toCID(new byte[] { (byte) 0xFF, (byte) 0xFF }),
                "Indentity-H CID 0xFFFF");
    }

    @Test
    void testUniJIS_UTF16_H() throws IOException
    {
        CMap cMap = new CMapParser().parsePredefined("UniJIS-UTF16-H");

        // the next 3 cases demonstrate the issue of possible false result values of CMap.toCID(int code)
        assertEquals(694, cMap.toCID(0xb1), "UniJIS-UTF16-H CID 0xb1 -> 694");
        assertNotEquals(694, cMap.toCID(0xb1, 1), "UniJIS-UTF16-H CID 0xb1 -> 694");
        assertEquals(694, cMap.toCID(0xb1, 2), "UniJIS-UTF16-H CID 0x00b1 -> 694");

        // 1:1 cid char mapping
        assertEquals(694, cMap.toCID(new byte[] { 0x00, (byte) 0xb1 }),
                "UniJIS-UTF16-H CID 0x00b1 -> 694");
        assertEquals(20168, cMap.toCID(new byte[] { (byte) 0xd8, 0x50, (byte) 0xdc, 0x4b }),
                "UniJIS-UTF16-H CID 0xd850dc4b -> 20168");

        // cid range mapping
        assertEquals(19223, cMap.toCID(new byte[] { 0x54, 0x34 }),
                "UniJIS-UTF16-H CID 0x5434 -> 19223");
        assertEquals(10006, cMap.toCID(new byte[] { (byte) 0xd8, 0x3c, (byte) 0xdd, 0x12 }),
                "UniJIS-UTF16-H CID 0xd83cdd12 -> 10006");

    }

    @Test
    void testUniJIS_UCS2_H() throws IOException
    {
        CMap cMap = new CMapParser().parsePredefined("UniJIS-UCS2-H");

        assertEquals(34, cMap.toCID(new byte[] { 0, 65 }), "UniJIS-UCS2-H CID 65 -> 34");
    }

    @Test
    void testAdobe_GB1_UCS2() throws IOException
    {
        CMap cMap = new CMapParser().parsePredefined("Adobe-GB1-UCS2");

        assertEquals("0", cMap.toUnicode(new byte[] { 0, 0x11 }),
                "Adobe-GB1-UCS2 CID 0x11 -> \"0\"");
    }

    /**
     * Test the parser against a valid, but poorly formatted CMap file.
     * @throws IOException If something went wrong
     */
    @Test
    void testParserWithPoorWhitespace() throws IOException
    {
        CMap cMap = new CMapParser().parse(new RandomAccessReadBufferedFile(
                new File("src/test/resources/cmap", "CMapNoWhitespace")));

        assertNotNull(cMap, "Failed to parse nasty CMap file");
    }

    @Test
    void testParserWithMalformedbfrange1() throws IOException
    {
        CMap cMap = new CMapParser()
                .parse(new RandomAccessReadBufferedFile(
                        new File("src/test/resources/cmap", "CMapMalformedbfrange1")));

        assertNotNull(cMap, "Failed to parse malformed CMap file");

        byte[] bytes1 = { 0, 1 };
        assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange <0001> <0009> <0041>");

        byte[] bytes2 = { 1, 00 };
        assertNull(cMap.toUnicode(bytes2));

    }

    @Test
    void testParserWithMalformedbfrange2() throws IOException
    {
        CMap cMap = new CMapParser()
                .parse(new RandomAccessReadBufferedFile(
                        new File("src/test/resources/cmap", "CMapMalformedbfrange2")));

        assertNotNull(cMap, "Failed to parse malformed CMap file");

        assertEquals("0", cMap.toUnicode(new byte[] { 0, 1 }),
                "bytes 00 01 from bfrange <0001> <0009> <0030>");

        assertEquals("A", cMap.toUnicode(new byte[] { 2, 0x32 }),
                "bytes 02 32 from bfrange <0232> <0432> <0041>");

        // check border values for non strict mode
        assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
        assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));

        // use strict mode
        cMap = new CMapParser(true)
                .parse(new RandomAccessReadBufferedFile(
                        new File("src/test/resources/cmap", "CMapMalformedbfrange2")));
        // check border values for strict mode
        assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
        assertNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));

    }

    @Test
    void testPredefinedMap() throws IOException
    {
        CMap cMap = new CMapParser().parsePredefined("Adobe-Korea1-UCS2");
        assertNotNull(cMap, "Failed to parse predefined CMap Adobe-Korea1-UCS2");

        assertEquals("Adobe-Korea1-UCS2", cMap.getName(), "wrong CMap name");
        assertEquals(0, cMap.getWMode(), "wrong WMode");
        assertFalse(cMap.hasCIDMappings());
        assertTrue(cMap.hasUnicodeMappings());

        cMap = new CMapParser().parsePredefined("Identity-V");
        assertNotNull(cMap, "Failed to parse predefined CMap Identity-V");
    }

    @Test
    void testIdentitybfrange() throws IOException
    {
        // use strict mode
        CMap cMap = new CMapParser(true)
                .parse(new RandomAccessReadBufferedFile(
                        new File("src/test/resources/cmap", "Identitybfrange")));
        assertEquals("Adobe-Identity-UCS", cMap.getName(), "wrong CMap name");

        byte[] bytes = { 0, 65 };
        assertEquals(new String(bytes, StandardCharsets.UTF_16BE), cMap.toUnicode(bytes),
                "Indentity 0x0048");
        bytes = new byte[] { 0x30, 0x39 };
        assertEquals(new String(bytes, StandardCharsets.UTF_16BE), cMap.toUnicode(bytes),
                "Indentity 0x3039");
        // check border values for strict mode
        bytes = new byte[] { 0x30, (byte) 0xFF };
        assertEquals(new String(bytes, StandardCharsets.UTF_16BE), cMap.toUnicode(bytes),
                "Indentity 0x30FF");
        // check border values for strict mode
        bytes = new byte[] { 0x31, 0x00 };
        assertEquals(new String(bytes, StandardCharsets.UTF_16BE), cMap.toUnicode(bytes),
                "Indentity 0x3100");
        bytes = new byte[] { (byte) 0xFF, (byte) 0xFF };
        assertEquals(new String(bytes, StandardCharsets.UTF_16BE), cMap.toUnicode(bytes),
                "Indentity 0xFFFF");

    }
}