TestCOSString.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.cos;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

import org.apache.pdfbox.pdfwriter.COSWriter;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

/**
 * This will test all of the filters in the PDFBox system.
 *
 * Ben Litchfield
 */
class TestCOSString extends TestCOSBase
{
    private static final String ESC_CHAR_STRING =
            "( test#some) escaped< \\chars>!~1239857 ";
    private static final String ESC_CHAR_STRING_PDF_FORMAT =
            "\\( test#some\\) escaped< \\\\chars>!~1239857 ";

    @BeforeAll
    static void setUp()
    {
        testCOSBase = new COSString("test cos string");
    }

    /**
     * Test setForceHexForm() and setForceLiteralForm() - tests these two methods do enforce the
     * different String output forms within PDF. 
     */
    @Test
    void testSetForceHexLiteralForm()
    {
        String inputString = "Test with a text and a few numbers 1, 2 and 3";
        String pdfHex = "<" + createHex(inputString) + ">";
        COSString cosStr = new COSString(inputString, true);
        writePDFTests(pdfHex, cosStr);

        COSString escStr = new COSString(ESC_CHAR_STRING);
        writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", escStr);
        COSString escStrHex = new COSString(ESC_CHAR_STRING, true);
        // Escape characters not escaped in hex version
        writePDFTests("<" + createHex(ESC_CHAR_STRING) + ">", escStrHex);
    }

    /**
     * Helper method for testing writePDF().
     * 
     * @param expected the String expected when writePDF() is invoked
     * @param testSubj the test subject
     */
    private void writePDFTests(String expected, COSString testSubj)
    {
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        try
        {
            COSWriter.writeString(testSubj, outStream);
        }
        catch (IOException e)
        {
            fail("IOException: " + e.getMessage());
        }
        assertEquals(expected, outStream.toString());
    }

    /**
     * Test parseHex() - tests that the proper String is created from a hex string input.
     */
    @Test
    void testFromHex()
    {
        String expected = "Quick and simple test";
        String hexForm = createHex(expected);
        try
        {
            COSString test1 = COSString.parseHex(hexForm);
            writePDFTests("(" + expected + ")", test1);
            COSString test2 = COSString.parseHex(createHex(ESC_CHAR_STRING));
            writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", test2);
        }
        catch (IOException e)
        {
            fail("IOException thrown: " + e.getMessage());
        }
        assertThrows(IOException.class, () -> COSString.parseHex(hexForm + "xx"),
                "Should have thrown an IOException here");
    }

    private String createHex(String str)
    {
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray())
        {
            sb.append(Integer.toString(c, 16));
        }
        return sb.toString().toUpperCase();
    }

    /**
     * Tests getHex() - ensure the hex String returned is properly formatted.
     */
    @Test
    void testGetHex()
    {
        String expected = "Test subject for testing getHex";
        COSString test1 = new COSString(expected);
        String hexForm = createHex(expected);
        assertEquals(hexForm, test1.toHexString());
        COSString escCS = new COSString(ESC_CHAR_STRING);
        // Not sure whether the escaped characters should be escaped or not, presumably since 
        // writePDF() gives you the proper formatted text, getHex() should ONLY convert to hex. 
        assertEquals(createHex(ESC_CHAR_STRING), escCS.toHexString());
    }

    /**
     * Test testGetString() - ensure getString() are returned in the correct format.
     */
    @Test
    void testGetString()
    {
        try
        {
            String testStr = "Test subject for getString()";
            COSString test1 = new COSString(testStr);
            assertEquals(testStr, test1.getString());

            COSString hexStr = COSString.parseHex(createHex(testStr));
            assertEquals(testStr, hexStr.getString());

            COSString escapedString = new COSString(ESC_CHAR_STRING);
            assertEquals(ESC_CHAR_STRING, escapedString.getString());

            testStr = "Line1\nLine2\nLine3\n";
            COSString lineFeedString = new COSString(testStr);
            assertEquals(testStr, lineFeedString.getString());
        }
        catch (IOException e)
        {
            fail("IOException thrown: " + e.getMessage());
        }
    }

    /**
     * Test getBytes() - again not much to test, just ensure the proper byte array is returned.
     */
    @Test
    void testGetBytes()
    {
        COSString str = new COSString(ESC_CHAR_STRING);
        testByteArrays(ESC_CHAR_STRING.getBytes(), str.getBytes());
    }

    /**
     * Tests writePDF() - tests that the string is in PDF format.
     */
    @Test
    void testWritePDF()
    {
        // This has been tested quite thorougly above but do a couple tests anyway
        COSString testSubj = new COSString(ESC_CHAR_STRING);
        writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", testSubj);
        String textString = "This is just an arbitrary piece of text for testing";
        COSString testSubj2 = new COSString(textString);
        writePDFTests("(" + textString + ")", testSubj2);
    }

    /**
     * This will test all of the filters in the system.
     *
     * @throws IOException If there is an exception while encoding.
     */
    @Test
    void testUnicode() throws IOException
    {
        String theString = "\u4e16";
        COSString string = new COSString(theString);
        assertEquals(string.getString(), theString);
        
        String textAscii = "This is some regular text. It should all be expressible in ASCII";
        /** En fran��ais o�� les choses sont accentu��s. En espa��ol, as�� */
        String text8Bit = "En fran\u00e7ais o\u00f9 les choses sont accentu\u00e9s. En espa\u00f1ol, as\u00ed";
         /** ������������������������ */
        String textHighBits =  "\u3092\u30af\u30ea\u30c3\u30af\u3057\u3066\u304f";

        // Testing the getString method
        COSString stringAscii = new COSString( textAscii );
        assertEquals( stringAscii.getString(), textAscii );
        
        COSString string8Bit = new COSString( text8Bit );
        assertEquals( string8Bit.getString(), text8Bit );

        COSString stringHighBits = new COSString( textHighBits );
        assertEquals( stringHighBits.getString(), textHighBits );
        

        // Testing the getBytes method
        // The first two strings should be stored as ISO-8859-1 because they only contain chars in the range 0..255
        assertEquals(textAscii, new String(stringAscii.getBytes(), StandardCharsets.ISO_8859_1));
        // likewise for the 8bit characters.
        assertEquals(text8Bit, new String(string8Bit.getBytes(), StandardCharsets.ISO_8859_1));
        
        // The japanese text contains high bits so must be stored as big endian UTF-16
        assertEquals(textHighBits, new String(stringHighBits.getBytes(), "UnicodeBig"));
        
        
        // Test the writePDF method to ensure that the Strings are correct when written into PDF.
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        COSWriter.writeString(stringAscii, out);
        assertEquals("(" + textAscii + ")", out.toString("ASCII"));
        
        out.reset();
        COSWriter.writeString(string8Bit, out);
        StringBuffer hex = new StringBuffer();
        for(char c : text8Bit.toCharArray())
        {
           hex.append( Integer.toHexString(c).toUpperCase() );
        }
        assertEquals("<"+ hex +">", out.toString("ASCII"));
        
        out.reset();
        COSWriter.writeString(stringHighBits, out);
        hex = new StringBuffer();
        hex.append("FEFF"); // Byte Order Mark
        for(char c : textHighBits.toCharArray())
        {
           hex.append( Integer.toHexString(c).toUpperCase() );
        }
        assertEquals("<"+ hex +">", out.toString("ASCII"));
    }

    @Override
    @Test
    void testAccept() throws IOException
    {
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        ICOSVisitor visitor = new COSWriter(outStream);
        COSString testSubj = new COSString(ESC_CHAR_STRING);
        testSubj.accept(visitor);
        assertEquals("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", outStream.toString());
        outStream.reset();
        COSString testSubjHex = new COSString(ESC_CHAR_STRING, true);
        testSubjHex.accept(visitor);
        assertEquals("<" + createHex(ESC_CHAR_STRING) + ">", outStream.toString());
    }

    /**
     * Tests equals(Object) - ensure that the Object.equals() contract is obeyed.
     */
    @Test
    void testEquals()
    {
        // Check all these several times for consistency
        for (int i = 0; i < 10; i++)
        {
            // Reflexive
            COSString x1 = new COSString("Test");
            assertEquals(x1, x1);

            // Symmetry i.e. if x == y then y == x
            COSString y1 = new COSString("Test");
            assertEquals(x1, y1);
            assertEquals(y1, x1);
            COSString x2 = new COSString("Test", true);
            // also if x != y then y != x
            assertNotEquals(x1, x2);
            assertNotEquals(x2, x1);

            // Transitive if x == y && y == z then x == z
            COSString z1 = new COSString("Test");
            assertEquals(x1, y1);
            assertEquals(y1, z1);
            assertEquals(x1, z1);
            // Test the negative as well if x1 == y1 && y1 != x2 then x1 != x2
            assertEquals(x1, y1);
            assertNotEquals(y1, x2);
            assertNotEquals(x1, x2);
        }
    }

    /**
     * Test hashCode() - tests that the Object.hashCode() contract is obeyed.
     */
    @Test
    void testHashCode()
    {
        COSString str1 = new COSString("Test1");
        COSString str2 = new COSString("Test2");
        assertNotEquals(str1.hashCode(), str2.hashCode());
        COSString str3 = new COSString("Test1");
        assertEquals(str1.hashCode(), str3.hashCode());
        COSString str3Hex = new COSString("Test1", true);
        assertNotEquals(str1.hashCode(), str3Hex.hashCode());
    }

    /**
     * Test testCompareFromHexString() - tests that Strings created from hex
     * compare correctly (PDFBOX-2401)
     * 
     * @throws java.io.IOException
     */
    @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity
    @Test
    void testCompareFromHexString() throws IOException
    {
        COSString test1 = COSString.parseHex("000000FF000000");
        COSString test2 = COSString.parseHex("000000FF00FFFF");
        assertEquals(test1, test1);
        assertEquals(test2, test2);
        assertNotEquals(test1.toHexString(), test2.toHexString());
        assertFalse(Arrays.equals(test1.getBytes(), test2.getBytes()));
        assertNotEquals(test1, test2);
        assertNotEquals(test2, test1);
        assertNotEquals(test1.getString(), test2.getString());
    }

    /**
     * PDFBOX-3881: Test that if String has only the BOM, that it be an empty string.
     * 
     * @throws IOException 
     */
    @Test
    void testEmptyStringWithBOM() throws IOException
    {
        assertTrue(COSString.parseHex("FEFF").getString().isEmpty());
        assertTrue(COSString.parseHex("FFFE").getString().isEmpty());
    }
}