TextToPDF.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.tools;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.Callable;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts.FontName;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
/**
* This will take a text file and output a pdf with that text.
*
* @author Ben Litchfield
*/
@Command(name = "texttopdf", header = "Creates a PDF document from text", versionProvider = Version.class, mixinStandardHelpOptions = true)
public class TextToPDF implements Callable<Integer>
{
/**
* The scaling factor for font units to PDF units
*/
private static final int FONTSCALE = 1000;
/**
* The default font size
*/
private static final float DEFAULT_FONT_SIZE = 10;
/**
* The line height as a factor of the font size
*/
private static final float DEFAULT_LINE_HEIGHT_FACTOR = 1.05f;
/**
* The default margin
*/
private static final float DEFAULT_MARGIN = 40;
private PDRectangle mediaBox = PDRectangle.LETTER;
private PDFont font = null;
// Expected for CLI app to write to System.out/System.err
@SuppressWarnings("squid:S106")
private final PrintStream SYSERR;
@Option(names = "-fontSize", description = "the size of the font to use (default: ${DEFAULT-VALUE})")
private float fontSize = DEFAULT_FONT_SIZE;
@Option(names = "-lineSpacing", description = "the factor of the font size for the line height (default: ${DEFAULT-VALUE})")
private float lineSpacing = DEFAULT_LINE_HEIGHT_FACTOR;
@Option(names = "-landscape", description = "set orientation to landscape")
private boolean landscape = false;
@Option(names = "-pageSize", description = "the page size to use. \nCandidates: ${COMPLETION-CANDIDATES} (default: ${DEFAULT-VALUE})")
private PageSizes pageSize = PageSizes.LETTER;
@Option(names = "-charset", description = "the charset to use. \n(default: ${DEFAULT-VALUE})")
private Charset charset = Charset.defaultCharset();
@Option(names = "-margins", arity="0..4", description = "Left Right Top Bottom margins (default: ${DEFAULT-VALUE})")
private float[] margins = {DEFAULT_MARGIN, DEFAULT_MARGIN, DEFAULT_MARGIN, DEFAULT_MARGIN};
@Option(names = "-standardFont",
description = "the font to use for the text. Either this or -ttf should be specified but not both.\nCandidates: ${COMPLETION-CANDIDATES} (default: ${DEFAULT-VALUE})")
private FontName standardFont = FontName.HELVETICA;
@Option(names = "-ttf", paramLabel="<ttf file>", description = "the TTF font to use for the text. Either this or -standardFont should be specified but not both.")
private File ttf;
@Option(names = {"-i", "--input"}, description = "the text file to convert", required = true)
private File infile;
@Option(names = {"-o", "--output"}, description = "the generated PDF file", required = true)
private File outfile;
private float leftMargin = DEFAULT_MARGIN;
private float rightMargin = DEFAULT_MARGIN;
private float topMargin = DEFAULT_MARGIN;
private float bottomMargin = DEFAULT_MARGIN;
private enum PageSizes
{
LETTER(PDRectangle.LETTER),
LEGAL(PDRectangle.LEGAL),
A0(PDRectangle.A0),
A1(PDRectangle.A1),
A2(PDRectangle.A2),
A3(PDRectangle.A3),
A4(PDRectangle.A4),
A5(PDRectangle.A5),
A6(PDRectangle.A6);
final PDRectangle pageSize;
private PageSizes(PDRectangle pageSize)
{
this.pageSize = pageSize;
}
public PDRectangle getPageSize()
{
return this.pageSize;
}
}
/**
* Constructor.
*/
public TextToPDF()
{
SYSERR = System.err;
}
/**
* This will create a PDF document with some text in it. <br>
* see usage() for commandline
*
* @param args Command line arguments.
*/
public static void main(String[] args)
{
// suppress the Dock icon on OS X
System.setProperty("apple.awt.UIElement", "true");
int exitCode = new CommandLine(new TextToPDF()).execute(args);
System.exit(exitCode);
}
@Override
public Integer call()
{
try (PDDocument doc = new PDDocument())
{
if (ttf != null)
{
font = PDType0Font.load(doc, ttf);
}
else
{
font = new PDType1Font(standardFont);
}
setFont(font);
setFontSize(fontSize);
setMediaBox(pageSize.getPageSize());
setLandscape(landscape);
setLineSpacing(lineSpacing);
setLeftMargin(margins[0]);
setRightMargin(margins[1]);
setTopMargin(margins[2]);
setBottomMargin(margins[3]);
boolean hasUtf8BOM = false;
if (charset.equals(StandardCharsets.UTF_8))
{
// check for utf8 BOM
// FileInputStream doesn't support mark/reset
try (InputStream is = new FileInputStream(infile))
{
if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF)
{
hasUtf8BOM = true;
}
}
}
try (InputStream is = new FileInputStream(infile))
{
if (hasUtf8BOM)
{
long skipped = is.skip(3);
if (skipped != 3)
{
throw new IOException("Could not skip 3 bytes, size changed?!");
}
}
try (Reader reader = new InputStreamReader(is, charset))
{
createPDFFromText(doc, reader);
}
}
doc.save(outfile);
}
catch (IOException ioe)
{
SYSERR.println( "Error converting text to PDF [" + ioe.getClass().getSimpleName() + "]: " + ioe.getMessage());
return 4;
}
return 0;
}
/**
* Create a PDF document with some text.
*
* @param text The stream of text data.
*
* @return The document with the text in it.
*
* @throws IOException If there is an error writing the data.
*/
public PDDocument createPDFFromText( Reader text ) throws IOException
{
PDDocument doc = new PDDocument();
createPDFFromText(doc, text);
return doc;
}
/**
* Create a PDF document with some text.
*
* @param doc The document.
* @param text The stream of text data.
*
* @throws IOException If there is an error writing the data.
*/
public void createPDFFromText( PDDocument doc, Reader text ) throws IOException
{
// for some reason the font isn't initialized -> use default font
if (font == null)
{
font = new PDType1Font(standardFont);
}
float fontHeight = font.getBoundingBox().getHeight() / FONTSCALE;
PDRectangle actualMediaBox =
landscape ? new PDRectangle(mediaBox.getHeight(), mediaBox.getWidth()) : mediaBox;
// calculate line height and increase by a factor.
float lineHeight = fontHeight * fontSize * lineSpacing;
BufferedReader data = new BufferedReader(text);
String nextLine;
PDPage page = new PDPage(actualMediaBox);
PDPageContentStream contentStream = null;
float y = -1;
float maxStringLength = page.getMediaBox().getWidth() - leftMargin - rightMargin;
// There is a special case of creating a PDF document from an empty string.
boolean textIsEmpty = true;
StringBuilder nextLineToDraw = new StringBuilder();
while ((nextLine = data.readLine()) != null)
{
// The input text is nonEmpty. New pages will be created and added
// to the PDF document as they are needed, depending on the length of
// the text.
textIsEmpty = false;
String[] lineWords = nextLine.replaceAll("[\\n\\r]+$", "").split(" ", -1);
int lineIndex = 0;
while (lineIndex < lineWords.length)
{
nextLineToDraw.setLength(0);
boolean addSpace = false;
float lengthIfUsingNextWord = 0;
boolean ff = false;
do
{
String word1, word2 = "";
String word = lineWords[lineIndex];
int indexFF = word.indexOf('\f');
if (indexFF == -1)
{
word1 = word;
}
else
{
ff = true;
word1 = word.substring(0, indexFF);
if (indexFF < word.length())
{
word2 = word.substring(indexFF + 1);
}
}
// word1 is the part before ff, word2 after
// both can be empty
// word1 can also be empty without ff, if a line has many spaces
if (!word1.isEmpty() || !ff)
{
if (addSpace)
{
nextLineToDraw.append(' ');
}
else
{
addSpace = true;
}
nextLineToDraw.append(word1);
}
if (!ff || word2.isEmpty())
{
lineIndex++;
}
else
{
lineWords[lineIndex] = word2;
}
if (ff)
{
break;
}
if (lineIndex < lineWords.length)
{
// need cut off at \f in next word to avoid IllegalArgumentException
String nextWord = lineWords[lineIndex];
indexFF = nextWord.indexOf('\f');
if (indexFF != -1)
{
nextWord = nextWord.substring(0, indexFF);
}
String lineWithNextWord = nextLineToDraw + " " + nextWord;
lengthIfUsingNextWord
= (font.getStringWidth(lineWithNextWord) / FONTSCALE) * fontSize;
}
}
while (lineIndex < lineWords.length && lengthIfUsingNextWord < maxStringLength);
if (y - lineHeight < bottomMargin)
{
// We have crossed the end-of-page boundary and need to extend the
// document by another page.
page = new PDPage(actualMediaBox);
doc.addPage(page);
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(font, fontSize);
contentStream.beginText();
y = page.getMediaBox().getHeight() - topMargin;
y += lineHeight - fontHeight * fontSize; // adjust for lineSpacing != 1
contentStream.newLineAtOffset(leftMargin, y);
}
if (contentStream == null)
{
throw new IOException("Error:Expected non-null content stream.");
}
contentStream.newLineAtOffset(0, -lineHeight);
y -= lineHeight;
contentStream.showText(nextLineToDraw.toString());
if (ff)
{
page = new PDPage(actualMediaBox);
doc.addPage(page);
contentStream.endText();
contentStream.close();
contentStream = new PDPageContentStream(doc, page);
contentStream.setFont(font, fontSize);
contentStream.beginText();
y = page.getMediaBox().getHeight() - topMargin;
y += lineHeight - fontHeight * fontSize; // adjust for lineSpacing != 1
contentStream.newLineAtOffset(leftMargin, y);
}
}
}
// If the input text was the empty string, then the above while loop will have short-circuited
// and we will not have added any PDPages to the document.
// So in order to make the resultant PDF document readable by Adobe Reader etc, we'll add an empty page.
if (textIsEmpty)
{
doc.addPage(page);
}
if (contentStream != null)
{
contentStream.endText();
contentStream.close();
}
}
/**
* @return Returns the font.
*/
public PDFont getFont()
{
return font;
}
/**
* @param aFont The font to set.
*/
public void setFont(PDFont aFont)
{
this.font = aFont;
}
/**
* @return Returns the fontSize.
*/
public float getFontSize()
{
return fontSize;
}
/**
* @param aFontSize The fontSize to set.
*/
public void setFontSize(float aFontSize)
{
this.fontSize = aFontSize;
}
/**
* @return Returns the lineSpacing.
*/
public float getLineSpacing()
{
return lineSpacing;
}
/**
* @param lineSpacing The lineSpacing to set.
*/
public void setLineSpacing(float lineSpacing)
{
if (lineSpacing <= 0)
{
throw new IllegalArgumentException("line spacing must be positive: " + lineSpacing);
}
this.lineSpacing = lineSpacing;
}
/**
* @return Returns the left margin.
*/
public float getLeftMargin()
{
return leftMargin;
}
/**
* @param leftMargin The left margin to be set.
*/
public void setLeftMargin(float leftMargin)
{
this.leftMargin = leftMargin;
}
/**
* @return Returns the right margin.
*/
public float getRightMargin()
{
return rightMargin;
}
/**
* @param rightMargin The right margin to be set.
*/
public void setRightMargin(float rightMargin)
{
this.rightMargin = rightMargin;
}
/**
* @return Returns the top margin.
*/
public float getTopMargin()
{
return topMargin;
}
/**
* @param topMargin The top margin to be set.
*/
public void setTopMargin(float topMargin)
{
this.topMargin = topMargin;
}
/**
* @return Returns the bottom margin.
*/
public float getBottomMargin()
{
return bottomMargin;
}
/**
* @param bottomMargin The bottom margin to be set.
*/
public void setBottomMargin(float bottomMargin)
{
this.bottomMargin = bottomMargin;
}
/**
* Sets page size of produced PDF.
*
* @return returns the page size (media box)
*/
public PDRectangle getMediaBox()
{
return mediaBox;
}
/**
* Sets page size of produced PDF.
*
* @param mediaBox
*/
public void setMediaBox(PDRectangle mediaBox)
{
this.mediaBox = mediaBox;
}
/**
* Tells the paper orientation.
*
* @return true for landscape orientation
*/
public boolean isLandscape()
{
return landscape;
}
/**
* Sets paper orientation.
*
* @param landscape true for landscape orientation
*/
public void setLandscape(boolean landscape)
{
this.landscape = landscape;
}
}