TaggedPdfReaderToolTest.java

/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2025 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package com.itextpdf.kernel.utils;

import com.itextpdf.commons.utils.FileUtil;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.tagging.PdfStructElem;
import com.itextpdf.kernel.pdf.tagging.PdfStructTreeRoot;
import com.itextpdf.kernel.pdf.tagging.StandardRoles;
import com.itextpdf.test.ExtendedITextTest;
import com.itextpdf.test.TestUtil;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import javax.xml.parsers.ParserConfigurationException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Tag;
import org.xml.sax.SAXException;

@Tag("IntegrationTest")
public class TaggedPdfReaderToolTest extends ExtendedITextTest {

    private static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/kernel/utils/TaggedPdfReaderToolTest/";
    private static final String DESTINATION_FOLDER = TestUtil.getOutputPath() + "/kernel/utils/TaggedPdfReaderToolTest/";

    @BeforeAll
    public static void beforeClass() {
        createOrClearDestinationFolder(DESTINATION_FOLDER);
    }

    @Test
    public void taggedPdfReaderToolTest01() throws IOException, ParserConfigurationException, SAXException {
        String filename = "iphone_user_guide.pdf";

        String outXmlPath = DESTINATION_FOLDER + "outXml01.xml";
        String cmpXmlPath = SOURCE_FOLDER + "cmpXml01.xml";

        PdfReader reader = new PdfReader(SOURCE_FOLDER + filename);

        try (OutputStream outXml = FileUtil.getFileOutputStream(outXmlPath);
             PdfDocument document = new PdfDocument(reader)) {

            TaggedPdfReaderTool tool = new TaggedPdfReaderTool(document);
            tool.setRootTag("root");
            tool.convertToXml(outXml);
        }

        CompareTool compareTool = new CompareTool();
        if (!compareTool.compareXmls(outXmlPath, cmpXmlPath)) {
            Assertions.fail("Resultant xml is different.");
        }
    }

    @Test
    public void noStructTreeRootInDocTest() {
        String outXmlPath = DESTINATION_FOLDER + "noStructTreeRootInDoc.xml";

        try {
            PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream()));
            TaggedPdfReaderTool tool = new TaggedPdfReaderTool(pdfDocument);
            try (OutputStream outXml = FileUtil.getFileOutputStream(outXmlPath)) {
                Exception exception = Assertions.assertThrows(PdfException.class,
                        () -> tool.convertToXml(outXml, "UTF-8"));
                Assertions.assertEquals(KernelExceptionMessageConstant.DOCUMENT_DOES_NOT_CONTAIN_STRUCT_TREE_ROOT,
                        exception.getMessage());
            }
        } catch (IOException e) {
            Assertions.fail("IOException is not expected to be triggered");
        }
    }

    @Test
    public void cyclicReferencesTest() throws IOException, ParserConfigurationException, SAXException {
        String outXmlPath = DESTINATION_FOLDER + "cyclicReferences.xml";
        String cmpXmlPath = SOURCE_FOLDER + "cmp_cyclicReferences.xml";

        PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream()));
        doc.setTagged();
        PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.convertRoleToPdfName(StandardRoles.P));
        PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.convertRoleToPdfName(StandardRoles.DIV));
        doc.getStructTreeRoot().addKid(kid1);
        doc.getStructTreeRoot().addKid(kid2);
        kid1.addKid(kid2);
        kid2.addKid(kid1);

        TaggedPdfReaderTool tool = new TaggedPdfReaderTool(doc);
        try (OutputStream outXml = FileUtil.getFileOutputStream(outXmlPath)) {
            tool.convertToXml(outXml, "UTF-8");
        }

        CompareTool compareTool = new CompareTool();
        if (!compareTool.compareXmls(outXmlPath, cmpXmlPath)) {
            Assertions.fail("Resultant xml is different.");
        }
    }
}