TestTextToPdf.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.tools;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.text.PDFTextStripper;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;

/**
 * Test suite for TextToPDF.
 */
class TestTextToPdf
{
    /**
     * This test ensures that a PDF created from an empty String is still readable by Adobe Reader
     */
    @Test
    void testCreateEmptyPdf() throws IOException
    {
        TextToPDF pdfCreator = new TextToPDF();
        PDDocument pdfDoc;
        try (StringReader reader = new StringReader(""))
        {
            pdfDoc = pdfCreator.createPDFFromText(reader);
        }

        // In order for the PDF document to be openable by Adobe Reader, it needs
        // to have some pages in it. So we'll check that.
        int pageCount = pdfDoc.getNumberOfPages();
        assertTrue(pageCount > 0, "All Pages was unexpectedly zero.");
        assertEquals(1, pageCount, "Wrong number of pages.");
        pdfDoc.close();
    }

    /**
     * Tests that the form feed is properly processed.
     * 
     * @throws IOException 
     */
    @Test
    void testFormFeed() throws IOException
    {
        TextToPDF pdfCreator = new TextToPDF();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        StringReader reader = new StringReader("First page\fSecond page\f\nThird page");
        try (PDDocument doc = pdfCreator.createPDFFromText(reader))
        {
            doc.save(baos);
        }
        try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
        {
            assertEquals(3, doc.getNumberOfPages());
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(1);
            stripper.setEndPage(1);
            assertEquals("First page", stripper.getText(doc).trim());
            stripper.setStartPage(2);
            stripper.setEndPage(2);
            assertEquals("Second page", stripper.getText(doc).trim());
            stripper.setStartPage(3);
            stripper.setEndPage(3);
            assertEquals("Third page", stripper.getText(doc).trim());
        }
    }

    /**
     * Tests x overflow so that new line is used, and overflow on the y axis so new page must be
     * created.
     *
     * @throws IOException
     */
    @Test
    void testOverflow() throws IOException
    {
        TextToPDF pdfCreator = new TextToPDF();
        pdfCreator.setMediaBox(PDRectangle.A6);
        StringReader reader = new StringReader("Lorem ipsum dolor sit amet, consetetur sadipscing "
                + "elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam "
                + "erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. "
                + "Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. "
                + "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod "
                + "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. "
                + "At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd "
                + "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem "
                + "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod "
                + "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. "
                + "At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd "
                + "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\n"
                + "\n"
                + "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie "
                + "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan "
                + "et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue "
                + "duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, "
                + "consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut "
                + "laoreet dolore magna aliquam erat volutpat.\n"
                + "\n"
                + "Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper "
                + "suscipit lobortis nisl ut aliquip ex ea commodo consequat. "
                + "Duis autem vel eum iriure dolor in hendrerit in vulputate "
                + "velit esse molestie consequat, vel illum dolore eu feugiat nulla "
                + "facilisis at vero eros et accumsan et iusto odio dignissim qui blandit "
                + "praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.\n"
                + "\n"
                + "Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming "
                + "id quod mazim placerat facer.");
        String expectedPage1Text
                = "Lorem ipsum dolor sit amet, consetetur\n"
                + "sadipscing elitr, sed diam nonumy eirmod\n"
                + "tempor invidunt ut labore et dolore magna\n"
                + "aliquyam erat, sed diam voluptua. At vero eos et\n"
                + "accusam et justo duo dolores et ea rebum. Stet\n"
                + "clita kasd gubergren, no sea takimata sanctus\n"
                + "est Lorem ipsum dolor sit amet. Lorem ipsum\n"
                + "dolor sit amet, consetetur sadipscing elitr, sed\n"
                + "diam nonumy eirmod tempor invidunt ut labore et\n"
                + "dolore magna aliquyam erat, sed diam voluptua.\n"
                + "At vero eos et accusam et justo duo dolores et\n"
                + "ea rebum. Stet clita kasd gubergren, no sea\n"
                + "takimata sanctus est Lorem ipsum dolor sit amet.\n"
                + "Lorem ipsum dolor sit amet, consetetur\n"
                + "sadipscing elitr, sed diam nonumy eirmod\n"
                + "tempor invidunt ut labore et dolore magna\n"
                + "aliquyam erat, sed diam voluptua. At vero eos et\n"
                + "accusam et justo duo dolores et ea rebum. Stet\n"
                + "clita kasd gubergren, no sea takimata sanctus\n"
                + "est Lorem ipsum dolor sit amet.\n"
                + "\n"
                + "Duis autem vel eum iriure dolor in hendrerit in\n"
                + "vulputate velit esse molestie consequat, vel illum\n"
                + "dolore eu feugiat nulla facilisis at vero eros et\n"
                + "accumsan et iusto odio dignissim qui blandit\n"
                + "praesent luptatum zzril delenit augue duis dolore\n"
                + "te feugait nulla facilisi. Lorem ipsum dolor sit\n"
                + "amet, consectetuer adipiscing elit, sed diam";
        String expectedPage2Text
                = "nonummy nibh euismod tincidunt ut laoreet\n"
                + "dolore magna aliquam erat volutpat.\n"
                + "\n"
                + "Ut wisi enim ad minim veniam, quis nostrud\n"
                + "exerci tation ullamcorper suscipit lobortis nisl ut\n"
                + "aliquip ex ea commodo consequat. Duis autem\n"
                + "vel eum iriure dolor in hendrerit in vulputate velit\n"
                + "esse molestie consequat, vel illum dolore eu\n"
                + "feugiat nulla facilisis at vero eros et accumsan et\n"
                + "iusto odio dignissim qui blandit praesent\n"
                + "luptatum zzril delenit augue duis dolore te feugait\n"
                + "nulla facilisi.\n"
                + "\n"
                + "Nam liber tempor cum soluta nobis eleifend\n"
                + "option congue nihil imperdiet doming id quod\n"
                + "mazim placerat facer.";
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try (PDDocument doc = pdfCreator.createPDFFromText(reader))
        {
            doc.save(baos);
        }
        try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
        {
            assertEquals(2, doc.getNumberOfPages());
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.setParagraphStart("\n");
            stripper.setStartPage(1);
            stripper.setEndPage(1);
            assertEquals(expectedPage1Text, stripper.getText(doc).trim());
            stripper.setStartPage(2);
            stripper.setEndPage(2);
            assertEquals(expectedPage2Text, stripper.getText(doc).trim());
        }
    }

    /**
     * Test that leading and trailing spaces and newlines are preserved.
     * 
     * @throws IOException 
     */
    @Test
    void testLeadingTrailingSpaces() throws IOException
    {
        TextToPDF pdfCreator = new TextToPDF();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        String text = "Lorem ipsum dolor sit amet,\n"
                + "    consectetur adipiscing \n"
                + "\n"
                + "elit. sed do eiusmod";
        StringReader reader = new StringReader(text);
        try (PDDocument doc = pdfCreator.createPDFFromText(reader))
        {
            doc.save(baos);
        }
        try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
        {
            assertEquals(1, doc.getNumberOfPages());
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.setParagraphStart("\n");
            assertEquals(text, stripper.getText(doc).trim());
        }
    }
}