PdfStringTest.java

/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2025 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package com.itextpdf.kernel.pdf;

import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.io.font.constants.StandardFonts;
import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
import com.itextpdf.kernel.pdf.tagging.StandardRoles;
import com.itextpdf.kernel.pdf.tagutils.TagTreePointer;
import com.itextpdf.kernel.utils.CompareTool;
import com.itextpdf.test.ExtendedITextTest;
import com.itextpdf.test.TestUtil;
import com.itextpdf.test.annotations.LogMessage;
import com.itextpdf.test.annotations.LogMessages;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;


@Tag("IntegrationTest")
public class PdfStringTest extends ExtendedITextTest {

    public static final String sourceFolder = "./src/test/resources/com/itextpdf/kernel/pdf/PdfStringTest/";
    public static final String destinationFolder = TestUtil.getOutputPath() + "/kernel/pdf/PdfStringTest/";

    @BeforeAll
    public static void beforeClass() {
        createDestinationFolder(destinationFolder);
    }

    @AfterAll
    public static void afterClass() {
        CompareTool.cleanup(destinationFolder);
    }
    
    @Test
    public void testPdfDocumentInfoStringEncoding01() throws IOException, InterruptedException {
        String fileName = "testPdfDocumentInfoStringEncoding01.pdf";

        PdfDocument pdfDocument = new PdfDocument(CompareTool.createTestPdfWriter(destinationFolder + fileName, new WriterProperties().setCompressionLevel(CompressionConstants.NO_COMPRESSION)));
        pdfDocument.addNewPage();

        String author = "��������������";
        String title = "������������������";
        String subject = "��������";
        String keywords = "���������������� ����������";
        String creator = "English text";

        pdfDocument.getDocumentInfo().setAuthor(author);
        pdfDocument.getDocumentInfo().setTitle(title);
        pdfDocument.getDocumentInfo().setSubject(subject);
        pdfDocument.getDocumentInfo().setKeywords(keywords);
        pdfDocument.getDocumentInfo().setCreator(creator);

        pdfDocument.close();

        PdfDocument readDoc = new PdfDocument(CompareTool.createOutputReader(destinationFolder + fileName));
        Assertions.assertEquals(author, readDoc.getDocumentInfo().getAuthor());
        Assertions.assertEquals(title, readDoc.getDocumentInfo().getTitle());
        Assertions.assertEquals(subject, readDoc.getDocumentInfo().getSubject());
        Assertions.assertEquals(keywords, readDoc.getDocumentInfo().getKeywords());
        Assertions.assertEquals(creator, readDoc.getDocumentInfo().getCreator());

        Assertions.assertNull(new CompareTool().compareByContent(destinationFolder + fileName, sourceFolder + "cmp_" + fileName, destinationFolder, "diff_"));
    }

    @Test
    public void testUnicodeString() {
        String unicode = "������������!";
        PdfString string = new PdfString(unicode);
        Assertions.assertNotEquals(unicode, string.toUnicodeString());
    }

    @Test
    public void readUtf8ActualText() throws java.io.IOException {
        String filename = sourceFolder + "utf-8-actual-text.pdf";
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(filename));
        String text = PdfTextExtractor.getTextFromPage(pdfDoc.getPage(1), new LocationTextExtractionStrategy().setUseActualText(true));
        pdfDoc.close();
        //  ��������������� ������������������������
        Assertions.assertEquals("\u0936\u093e\u0902\u0924\u093f \u0926\u0947\u0935\u0928\u093E\u0917\u0930\u0940", text);
    }

    @Test
    @LogMessages(messages = {
            @LogMessage(messageTemplate = IoLogMessageConstant.EXISTING_TAG_STRUCTURE_ROOT_IS_NOT_STANDARD)
    })
    public void readUtf8AltText() throws java.io.IOException {
        String filename = sourceFolder + "utf-8-alt-text.pdf";
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(filename), CompareTool.createTestPdfWriter(destinationFolder + "whatever"));
        TagTreePointer tagTreePointer = new TagTreePointer(pdfDoc);
        String alternateDescription = tagTreePointer.moveToKid(0).moveToKid(0).moveToKid(0).getProperties().getAlternateDescription();
        pdfDoc.close();
        //  2001: A Space Odyssey (���������������������� ��������������)
        Assertions.assertEquals("2001: A Space Odyssey (\u041A\u043E\u0441\u043C\u0438\u0447\u0435\u0441\u043A\u0430\u044F " +
                "\u043E\u0434\u0438\u0441\u0441\u0435\u044F)", alternateDescription);
    }

    @Test
    public void readUtf8Bookmarks() throws java.io.IOException {
        String filename = sourceFolder + "utf-8-bookmarks.pdf";
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(filename));
        PdfOutline outline = pdfDoc.getOutlines(true);
        List<String> children = new ArrayList<>(6);
        for (PdfOutline child : outline.getAllChildren()) {
            children.add(child.getTitle());
            for (PdfOutline childOfChild : child.getAllChildren())
                children.add(childOfChild.getTitle());
        }
        pdfDoc.close();
        List<String> expected = new ArrayList<>(6);
        //  ������
        expected.add("\u798F\u6615 bookmark 1");
        expected.add("\u798F\u6615  bookmark 1-1");
        expected.add("\u798F\u6615  bookmark 1-2");
        //  ������
        expected.add("\u4E2D\u56FD bookmark 2");
        expected.add("\u4E2D\u56FD  bookmark 2-1");
        expected.add("\u4E2D\u56FD  bookmark 2-2");
        for (int i = 0; i < 6; i++)
            Assertions.assertEquals(expected.get(i), children.get(i));
    }

    @Test
    public void readUtf8PageLabelPrefix() throws java.io.IOException {
        String filename = sourceFolder + "utf-8-page-label-prefix.pdf";
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(filename));
        String[] labels = pdfDoc.getPageLabels();
        String[] expected = new String[] {"A", "B", "1", "2", "3", "4", "Movies-5", "Movies-6", "Movies-7", "Movies-8",
                "Movies-9", "Movies-10", "Movies-11", "Movies-12"};
        pdfDoc.close();
        for (int i = 0; i < labels.length; i++)
            Assertions.assertEquals(expected[i], labels[i]);
    }

    @Test
    public void writeUtf8AltText() throws java.io.IOException, InterruptedException {
        String RESOURCE = sourceFolder + "Space Odyssey.jpg";
        PdfDocument pdfDoc = new PdfDocument(CompareTool.createTestPdfWriter(destinationFolder + "writeUtf8AltText.pdf"));
        pdfDoc.setTagged();

        PdfPage page = pdfDoc.addNewPage();
        PdfCanvas canvas = new PdfCanvas(page);

        TagTreePointer tagPointer = new TagTreePointer(pdfDoc);
        tagPointer.setPageForTagging(page);
        tagPointer.addTag(StandardRoles.DIV);

        tagPointer.addTag(StandardRoles.SPAN);
        //  2001: A Space Odyssey (���������������������� ��������������)
        tagPointer.getContext().getPointerStructElem(tagPointer)
                .setAlt(new PdfString("2001: A Space Odyssey (\u041A\u043E\u0441\u043C\u0438\u0447\u0435\u0441\u043A\u0430\u044F " +
                        "\u043E\u0434\u0438\u0441\u0441\u0435\u044F)", PdfEncodings.UTF8));
        ImageData img = ImageDataFactory.create(RESOURCE);
        canvas.openTag(tagPointer.getTagReference());
        canvas.addImageFittedIntoRectangle(img, new Rectangle(36, 700, 65, 100), false);
        canvas.closeTag();
        canvas.endText();
        pdfDoc.close();
        Assertions.assertNull(new CompareTool().compareByContent(destinationFolder + "writeUtf8AltText.pdf", sourceFolder + "cmp_writeUtf8AltText.pdf", destinationFolder, "diffAltText_"));
    }

    @Test
    public void writeUtf8Bookmarks() throws java.io.IOException, InterruptedException {
        PdfDocument pdfDoc = new PdfDocument(CompareTool.createTestPdfWriter(destinationFolder + "writeUtf8Bookmarks.pdf"));

        PdfPage page = pdfDoc.addNewPage();
        PdfCanvas canvas = new PdfCanvas(page);
        canvas.setFillColor(ColorConstants.MAGENTA);
        canvas.beginText();
        canvas.setFontAndSize(PdfFontFactory.createFont(StandardFonts.TIMES_ROMAN), 30);
        canvas.setTextMatrix(25, 500);
        canvas.showText("This file has bookmarks encoded with utf-8");
        canvas.endText();

        PdfOutline root = pdfDoc.getOutlines(false);

        PdfOutline first = root.addOutline("");
        //  ������
        first.getContent().put(PdfName.Title, new PdfString("\u798F\u6615 bookmark 1", PdfEncodings.UTF8));
        first.addOutline("").getContent().put(PdfName.Title, new PdfString("\u798F\u6615  bookmark 1-1", PdfEncodings.UTF8));
        first.addOutline("").getContent().put(PdfName.Title, new PdfString("\u798F\u6615  bookmark 1-2", PdfEncodings.UTF8));

        PdfOutline second = root.addOutline("");
        //  ������
        second.getContent().put(PdfName.Title, new PdfString("\u4E2D\u56FD bookmark 2", PdfEncodings.UTF8));
        second.addOutline("").getContent().put(PdfName.Title, new PdfString("\u4E2D\u56FD  bookmark 2-1", PdfEncodings.UTF8));
        second.addOutline("").getContent().put(PdfName.Title, new PdfString("\u4E2D\u56FD  bookmark 2-2", PdfEncodings.UTF8));

        pdfDoc.close();
        Assertions.assertNull(new CompareTool().compareByContent(destinationFolder + "writeUtf8Bookmarks.pdf", sourceFolder + "cmp_writeUtf8Bookmarks.pdf", destinationFolder, "diffBookmarks_"));
    }

    @Test
    public void writeUtf8PageLabelPrefix() throws java.io.IOException, InterruptedException {
        PdfDocument pdfDoc = new PdfDocument(CompareTool.createTestPdfWriter(destinationFolder + "writeUtf8PageLabelPrefix.pdf"));

        PdfPage page = pdfDoc.addNewPage();;
        PdfDictionary pageLabel = new PdfDictionary();
        pageLabel.put(PdfName.S, PdfName.D);
        pageLabel.put(PdfName.P, new PdfString("PREFIX-", PdfEncodings.UTF8));
        pageLabel.put(PdfName.St, new PdfNumber(1));
        pdfDoc.getCatalog().getPageLabelsTree(true).addEntry(pdfDoc.getPageNumber(page) - 1, pageLabel);

        PdfCanvas canvas = new PdfCanvas(page);
        canvas.setFillColor(ColorConstants.MAGENTA);
        canvas.beginText();
        canvas.setFontAndSize(PdfFontFactory.createFont(StandardFonts.TIMES_ROMAN), 30);
        canvas.setTextMatrix(25, 500);
        String text = "This page has pageLabel prefix " + "PREFIX-";
        canvas.showText(text);
        canvas.endText();

        pdfDoc.close();
        Assertions.assertNull(new CompareTool().compareByContent(destinationFolder + "writeUtf8PageLabelPrefix.pdf", sourceFolder + "cmp_writeUtf8PageLabelPrefix.pdf", destinationFolder, "diffPageLabelPrefix_"));
    }

    @Test
    public void writeUtf8ActualText() throws java.io.IOException, InterruptedException {
        PdfDocument pdfDoc = new PdfDocument(CompareTool.createTestPdfWriter(destinationFolder + "writeUtf8ActualText.pdf"));
        pdfDoc.setTagged();
        PdfPage page = pdfDoc.addNewPage();
        PdfCanvas canvas = new PdfCanvas(page);
        TagTreePointer tagPointer = new TagTreePointer(pdfDoc);
        tagPointer.setPageForTagging(page);
        tagPointer.addTag(StandardRoles.DIV);
        tagPointer.addTag(StandardRoles.SPAN);
        tagPointer.getContext().getPointerStructElem(tagPointer).setActualText(new PdfString("actual", PdfEncodings.UTF8));
        canvas.beginText();
        canvas.moveText(36, 788);
        canvas.setFontAndSize(PdfFontFactory.createFont(StandardFonts.TIMES_ROMAN), 12);
        canvas.openTag(tagPointer.getTagReference());
        canvas.showText("These piece of text has an actual text property. Can be viewed via properties of span in the tag tree.");
        canvas.closeTag();
        canvas.endText();
        pdfDoc.close();

        Assertions.assertNull(new CompareTool().compareByContent(destinationFolder + "writeUtf8ActualText.pdf", sourceFolder + "cmp_writeUtf8ActualText.pdf", destinationFolder, "diffActualText_"));
    }

    @Test
    public void emptyHexWriting() {
        PdfString string = new PdfString("");
        Assertions.assertEquals("", string.toUnicodeString());
        string.setHexWriting(true);
        Assertions.assertEquals("", string.toUnicodeString());
    }


    @Test
    public void nullHexWriting() {
        PdfString string = new PdfString("hello");
        Exception e = Assertions.assertThrows(PdfException.class, () -> {
            string.encodeBytes(null);
        });
        Assertions.assertEquals("byte[] should not be null.", e.getMessage());
    }
}