SimpleFontToUnicodeExtractionTest.java

/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2025 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package com.itextpdf.kernel.pdf.canvas.parser;

import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
import com.itextpdf.test.ExtendedITextTest;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Tag;

import java.io.IOException;

@Tag("IntegrationTest")
public class SimpleFontToUnicodeExtractionTest extends ExtendedITextTest {

    private static final String sourceFolder = "./src/test/resources/com/itextpdf/kernel/parser/SimpleFontToUnicodeExtractionTest/";

    @Test
    public void test01() throws IOException {
        PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "simpleFontToUnicode01.pdf"));
        String expected = "Information plays a central role in soci-\n" +
                "ety today, and it is becoming more and \n" +
                "more common for that information to \n" +
                "be offered in digital form alone. The re-\n" +
                "liable, user-friendly Portable Document \n" +
                "Format (PDF) has become the world���s \n" +
                "file type of choice for providing infor-\n" +
                "mation as a digital document. \n" +
                "Tags can be added to a PDF in order \n" +
                "to structure the content of a document. \n" +
                "These tags are a critical requirement if \n" +
                "any form of assistive technology (such \n" +
                "as screen readers, specialist mice, and \n" +
                "speech recognition and text-to-speech \n" +
                "software) is to gain access to this con-\n" +
                "tent. To date, PDF documents have rare-\n" +
                "ly been tagged, and not all software can \n" +
                "make use of PDF tags. In practical terms, \n" +
                "this particularly reduces information���s \n" +
                "accessibility for people with disabilities \n" +
                "who rely on assistive technology.";

        String actualText = PdfTextExtractor.getTextFromPage(pdfDocument.getPage(1),
                new LocationTextExtractionStrategy());

        Assertions.assertEquals(expected, actualText);
    }

    @Test
    public void test02() throws IOException {
        PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "simpleFontToUnicode02.pdf"));
        String expected = "ffaast";

        String actualText = PdfTextExtractor.getTextFromPage(pdfDocument.getPage(1),
                new LocationTextExtractionStrategy());

        Assertions.assertEquals(expected, actualText);
    }

}