ExtractTTFFonts.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.examples.pdmodel;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDSoftMask;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
/**
* This will extract all true type-fonts of a pdf.
*
*/
public final class ExtractTTFFonts
{
private int fontCounter = 1;
private final Set<COSDictionary> fontSet = new HashSet<>();
private int currentPage;
@SuppressWarnings({"squid:S2068"})
private static final String PASSWORD = "-password";
private static final String PREFIX = "-prefix";
private static final String ADDKEY = "-addkey";
private ExtractTTFFonts()
{
}
/**
* This is the entry point for the application.
*
* @param args The command-line arguments.
*
* @throws IOException If there is an error decrypting the document.
*/
public static void main(String[] args) throws IOException
{
ExtractTTFFonts extractor = new ExtractTTFFonts();
extractor.extractFonts(args);
}
private void extractFonts(String[] args) throws IOException
{
if (args.length < 1 || args.length > 4)
{
usage();
}
else
{
String pdfFile = null;
@SuppressWarnings({"squid:S2068"})
String password = "";
String prefix = null;
boolean addKey = false;
for (int i = 0; i < args.length; i++)
{
switch (args[i])
{
case PASSWORD:
i++;
if (i >= args.length)
{
usage();
}
password = args[i];
break;
case PREFIX:
i++;
if (i >= args.length)
{
usage();
}
prefix = args[i];
break;
case ADDKEY:
addKey = true;
break;
default:
if (pdfFile == null)
{
pdfFile = args[i];
}
break;
}
}
if (pdfFile == null)
{
usage();
}
else
{
if (prefix == null && pdfFile.length() > 4)
{
prefix = pdfFile.substring(0, pdfFile.length() - 4);
}
try (PDDocument document = Loader.loadPDF(new File(pdfFile), password))
{
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
processResources(acroForm.getDefaultResources(), prefix, addKey);
}
PDPageTree pageTree = document.getPages();
for (PDPage page : pageTree)
{
currentPage = pageTree.indexOf(page) + 1;
// extract all fonts which are part of the page resources
processResources(page.getResources(), prefix, addKey);
for (PDAnnotation ann : page.getAnnotations())
{
PDAppearanceStream nas = ann.getNormalAppearanceStream();
if (nas != null)
{
processResources(nas.getResources(), prefix, addKey);
}
PDAppearanceDictionary appearance = ann.getAppearance();
if (appearance != null)
{
PDAppearanceEntry nae = appearance.getNormalAppearance();
if (nae != null && nae.isStream())
{
nas = nae.getAppearanceStream();
processResources(nas.getResources(), prefix, addKey);
}
else if (nae != null && nae.isSubDictionary())
{
Map<COSName, PDAppearanceStream> subDic = nae.getSubDictionary();
for (PDAppearanceStream as : subDic.values())
{
processResources(as.getResources(), prefix, addKey);
}
}
}
}
}
}
}
}
}
private void processResources(PDResources resources, String prefix, boolean addKey) throws IOException
{
if (resources == null)
{
return;
}
processResourcesFonts(resources, addKey, prefix);
processNestedResources(resources, prefix, addKey);
}
private void processResourcesFonts(PDResources resources, boolean addKey, String prefix) throws IOException
{
for (COSName key : resources.getFontNames())
{
PDFont font = resources.getFont(key);
if (font == null)
{
continue;
}
System.out.println((font.getName() == null ? "(null)" : font.getName()) +
" on page " + currentPage);
if (fontSet.contains(font.getCOSObject()))
{
continue;
}
fontSet.add(font.getCOSObject());
// write the font
if (font instanceof PDTrueTypeFont)
{
String name;
if (addKey)
{
name = getUniqueFileName(prefix + "_" + key, "ttf");
}
else
{
name = getUniqueFileName(prefix, "ttf");
}
writeFont(font.getFontDescriptor(), name);
}
else if (font instanceof PDType0Font)
{
PDCIDFont descendantFont = ((PDType0Font) font).getDescendantFont();
if (descendantFont instanceof PDCIDFontType2)
{
String name;
if (addKey)
{
name = getUniqueFileName(prefix + "_" + key, "ttf");
}
else
{
name = getUniqueFileName(prefix, "ttf");
}
writeFont(descendantFont.getFontDescriptor(), name);
}
}
}
}
private void processNestedResources(PDResources resources, String prefix, boolean addKey)
throws IOException
{
for (COSName name : resources.getXObjectNames())
{
PDXObject xobject = resources.getXObject(name);
if (xobject instanceof PDFormXObject)
{
PDFormXObject xObjectForm = (PDFormXObject) xobject;
processResources(xObjectForm.getResources(), prefix, addKey);
}
}
for (COSName name : resources.getPatternNames())
{
PDAbstractPattern pattern = resources.getPattern(name);
if (pattern instanceof PDTilingPattern)
{
PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
processResources(tilingPattern.getResources(), prefix, addKey);
}
}
for (COSName name : resources.getExtGStateNames())
{
PDExtendedGraphicsState extGState = resources.getExtGState(name);
PDSoftMask softMask = extGState.getSoftMask();
if (softMask != null)
{
PDTransparencyGroup group = softMask.getGroup();
if (group != null)
{
processResources(group.getResources(), prefix, addKey);
}
}
}
}
private void writeFont(PDFontDescriptor fd, String name) throws IOException
{
if (fd != null)
{
PDStream ff2Stream = fd.getFontFile2();
if (ff2Stream != null)
{
System.out.println("Writing font: " + name);
try (OutputStream os = new FileOutputStream(name + ".ttf");
InputStream is = ff2Stream.createInputStream())
{
is.transferTo(os);
}
}
}
}
private String getUniqueFileName(String prefix, String suffix)
{
String uniqueName = null;
File f = null;
while (f == null || f.exists())
{
uniqueName = prefix + "-" + fontCounter;
f = new File(uniqueName + "." + suffix);
fontCounter++;
}
return uniqueName;
}
/**
* This will print the usage requirements and exit.
*/
private static void usage()
{
System.err.println("Usage: java " + ExtractTTFFonts.class.getName() + " [OPTIONS] <PDF file>\n"
+ " -password <password> Password to decrypt document\n"
+ " -prefix <font-prefix> Font prefix(default to pdf name)\n"
+ " -addkey add the internal font key to the file name\n"
+ " <PDF file> The PDF document to use\n");
System.exit(1);
}
}