AcroFormOrphanWidgetsProcessor.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.fixup.processor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.font.FontMapper;
import org.apache.pdfbox.pdmodel.font.FontMappers;
import org.apache.pdfbox.pdmodel.font.FontMapping;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDFieldFactory;
import org.apache.pdfbox.pdmodel.interactive.form.PDVariableText;

/**
 *  Generate field entries from page level widget annotations
 *  if there AcroForm /Fields entry is empty.
 * 
 */
public class AcroFormOrphanWidgetsProcessor extends AbstractProcessor
{
    
    private static final Logger LOG = LogManager.getLogger(AcroFormOrphanWidgetsProcessor.class);

    public AcroFormOrphanWidgetsProcessor(PDDocument document)
    { 
        super(document); 
    }

    @Override
    public void process()
    {
        /*
         * Get the AcroForm in it's current state.
         *
         * Also note: getAcroForm() applies a default fixup which this processor
         * is part of. So keep the null parameter otherwise this will end
         * in an endless recursive call
         */
        PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(null);

        if (acroForm != null)
        {            
            resolveFieldsFromWidgets(acroForm);
        } 
    }

    private void resolveFieldsFromWidgets(PDAcroForm acroForm)
    {
        LOG.debug("rebuilding fields from widgets");

        PDResources resources = acroForm.getDefaultResources();
        if (resources == null)
        {
            // failsafe. Currently resources is never null because defaultfixup is called first.
            LOG.debug("AcroForm default resources is null");
            return;
        }

        List<PDField> fields = new ArrayList<>();
        Map<String, PDField> nonTerminalFieldsMap = new HashMap<>();
        for (PDPage page : document.getPages())
        {
            try
            {
                handleAnnotations(acroForm, resources, fields, page.getAnnotations(), nonTerminalFieldsMap);
            }
            catch (IOException ioe)
            {
                LOG.debug("couldn't read annotations for page {}", ioe.getMessage());
            }
        }

        acroForm.setFields(fields);

        for (PDField field : acroForm.getFieldTree())
        {
            if (field instanceof PDVariableText)
            {
                ensureFontResources(resources, (PDVariableText) field);
            }
        }
    }

    private void handleAnnotations(PDAcroForm acroForm, PDResources acroFormResources,
            List<PDField> fields, List<PDAnnotation> annotations,
            Map<String, PDField> nonTerminalFieldsMap)
    {
        for (PDAnnotation annot : annotations)
        {
            if (annot instanceof PDAnnotationWidget)
            {
                addFontFromWidget(acroFormResources, annot);

                COSDictionary parent = annot.getCOSObject().getCOSDictionary(COSName.PARENT);
                if (parent != null)
                {
                    PDField resolvedField = resolveNonRootField(acroForm, parent, nonTerminalFieldsMap);
                    if (resolvedField != null)
                    {
                        fields.add(resolvedField);
                    }
                }
                else
                {
                    PDField field = PDFieldFactory.createField(acroForm, annot.getCOSObject(), null);
                    if (field != null)
                    {
                        fields.add(field);
                    }
                }
            }
        }
    }

    /**
     * Add font resources from the widget to the AcroForm to make sure embedded fonts are being used
     * and not added by ensureFontResources potentially using a fallback font.
     * 
     * @param acroFormResources AcroForm default resources, should not be null.
     * @param annotation annotation, should not be null.
     */
    private void addFontFromWidget(PDResources acroFormResources, PDAnnotation annotation)
    {
        PDAppearanceStream normalAppearanceStream = annotation.getNormalAppearanceStream();
        if (normalAppearanceStream == null)
        {
            return;
        }
        PDResources widgetResources = normalAppearanceStream.getResources();
        if (widgetResources == null)
        {
            return;
        }
        widgetResources.getFontNames().forEach(fontName ->
        {
            if (!fontName.getName().startsWith("+"))
            {
                try
                {
                    if (acroFormResources.getFont(fontName) == null)
                    {
                        acroFormResources.put(fontName, widgetResources.getFont(fontName));
                        LOG.debug("added font resource to AcroForm from widget for font name {}",
                                fontName.getName());
                    }
                }
                catch (IOException ioe)
                {
                    LOG.debug("unable to add font to AcroForm for font name {}",
                            fontName.getName());
                }
            }
            else
            {
                LOG.debug("font resource for widget was a subsetted font - ignored: {}",
                        fontName.getName());
            }
        });
    }

    /*
     *  Widgets having a /Parent entry are non root fields. Go up until the root node is found
     *  and handle from there.
     */
    private PDField resolveNonRootField(PDAcroForm acroForm, COSDictionary parent, Map<String, PDField> nonTerminalFieldsMap)
    {
        while (parent.containsKey(COSName.PARENT))
        {
            parent = parent.getCOSDictionary(COSName.PARENT);
            if (parent == null)
            {
                return null;
            }
        }
        
        if (nonTerminalFieldsMap.get(parent.getString(COSName.T)) == null)
        {
            PDField field = PDFieldFactory.createField(acroForm, parent, null);
            if (field != null)
            {
                nonTerminalFieldsMap.put(field.getFullyQualifiedName(), field);
            }
            return field;
        }

        // this should not happen, likely broken PDF
        return null;
    }


    /*
     *  Lookup the font used in the default appearance and if this is 
     *  not available try to find a suitable font and use that.
     *  This may not be the original font but a similar font replacement
     * 
     *  TODO: implement a font lookup similar as discussed in PDFBOX-2661 so that already existing
     *        font resources might be accepatble.
     *        In such case this must be implemented in PDDefaultAppearanceString too!
     */
    private void ensureFontResources(PDResources defaultResources, PDVariableText field)
    {
        String daString = field.getDefaultAppearance();
        if (daString.startsWith("/") && daString.length() > 1)
        {
            COSName fontName = COSName.getPDFName(daString.substring(1, daString.indexOf(' ')));
            try
            {
                if (defaultResources.getFont(fontName) == null)
                {
                    LOG.debug("trying to add missing font resource for field {}",
                            field.getFullyQualifiedName());
                    FontMapper mapper = FontMappers.instance();
                    FontMapping<TrueTypeFont> fontMapping = mapper.getTrueTypeFont(fontName.getName() , null);
                    if (fontMapping != null)
                    {
                        PDType0Font pdFont = PDType0Font.load(document, fontMapping.getFont(), false);
                        LOG.debug("looked up font for {} - found {}", fontName.getName(),
                                fontMapping.getFont().getName());
                        defaultResources.put(fontName, pdFont);
                    }
                    else
                    {
                        LOG.debug("no suitable font found for field {} for font name {}",
                                field.getFullyQualifiedName(), fontName.getName());
                    }
                }
            }
            catch (IOException ioe)
            {
                LOG.debug("unable to handle font resources for field {}: {}",
                        field.getFullyQualifiedName(), ioe.getMessage());
            }
        }
    }
}