PDAcroFormFromAnnotsTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.interactive.form;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.fixup.AbstractFixup;
import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup;
import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormOrphanWidgetsProcessor;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.junit.jupiter.api.Test;
/**
* Tests for building AcroForm entries form Widget annotations.
*
*/
class PDAcroFormFromAnnotsTest
{
/**
* PDFBOX-4985 AcroForms entry but empty Fields array
*
* Using the default get AcroForm call with error correction
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots4985DefaultMode() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf";
String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13013384/POPPLER-806-acrobat.pdf";
int numFormFieldsByAcrobat;
try (PDDocument testPdf = Loader.loadPDF(RandomAccessReadBuffer
.createBufferFromStream(new URI(acrobatSourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm(null);
numFormFieldsByAcrobat = acroForm.getFields().size();
}
try (PDDocument testPdf = Loader.loadPDF(RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
// need to do a low level cos access as the PDModel access will build the AcroForm
COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM);
COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS);
assertEquals(0, cosFields.size(), "Initially there shall be 0 fields");
PDAcroForm acroForm = catalog.getAcroForm();
assertEquals(numFormFieldsByAcrobat, acroForm.getFields().size(), "After rebuild there shall be " + numFormFieldsByAcrobat + " fields");
}
}
/**
* PDFBOX-4985 AcroForms entry but empty Fields array
*
* Using the acroform call with error correction
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots4985CorrectionMode() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf";
String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13013384/POPPLER-806-acrobat.pdf";
int numFormFieldsByAcrobat;
try (PDDocument testPdf = Loader.loadPDF(RandomAccessReadBuffer
.createBufferFromStream(new URI(acrobatSourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm(null);
numFormFieldsByAcrobat = acroForm.getFields().size();
}
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
// need to do a low level cos access as the PDModel access will build the AcroForm
COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM);
COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS);
assertEquals(0, cosFields.size(), "Initially there shall be 0 fields");
PDAcroForm acroForm = catalog.getAcroForm(new AcroFormDefaultFixup(testPdf));
assertEquals(numFormFieldsByAcrobat, acroForm.getFields().size(), "After rebuild there shall be " + numFormFieldsByAcrobat + " fields");
}
}
/**
* PDFBOX-4985 AcroForms entry but empty Fields array
*
* Using the acroform call without error correction
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots4985WithoutCorrectionMode() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf";
int numCosFormFields;
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
// need to do a low level cos access as the PDModel access will build the AcroForm
COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM);
COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS);
numCosFormFields = cosFields.size();
assertEquals(0, cosFields.size(), "Initially there shall be 0 fields");
PDAcroForm acroForm = catalog.getAcroForm(null);
assertEquals(numCosFormFields, acroForm.getFields().size(), "After call without correction there shall be " + numCosFormFields + " fields");
}
}
/**
* PDFBOX-3891 AcroForm with empty fields entry
*
* With the default correction nothing shall be added
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots3891DontCreateFields() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf";
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
// need to do a low level cos access as the PDModel access will build the AcroForm
COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM);
COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS);
assertEquals(0, cosFields.size(), "Initially there shall be 0 fields");
PDAcroForm acroForm = catalog.getAcroForm();
assertEquals(0, acroForm.getFields().size(), "After call with default correction there shall be 0 fields");
}
}
/**
* PDFBOX-3891 AcroForm with empty fields entry
*
* Special fixup to create fields
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots3891CreateFields() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf";
String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13014447/merge-test-na-acrobat.pdf";
int numFormFieldsByAcrobat;
// will build the expected fields using the acrobat source document
Map<String, PDField> fieldsByName = new HashMap<>();
try (PDDocument testPdf = Loader.loadPDF(RandomAccessReadBuffer
.createBufferFromStream(new URI(acrobatSourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm(null);
numFormFieldsByAcrobat = acroForm.getFields().size();
for (PDField field : acroForm.getFieldTree())
{
fieldsByName.put(field.getFullyQualifiedName(), field);
}
}
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
// need to do a low level cos access as the PDModel access will build the AcroForm
COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM);
COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS);
assertEquals(0, cosFields.size(), "Initially there shall be 0 fields");
PDAcroForm acroForm = catalog.getAcroForm(new CreateFieldsFixup(testPdf));
assertEquals(numFormFieldsByAcrobat, acroForm.getFields().size(), "After rebuild there shall be " + numFormFieldsByAcrobat + " fields");
// the the fields found are contained in the map
for (PDField field : acroForm.getFieldTree())
{
assertNotNull(fieldsByName.get(field.getFullyQualifiedName()));
}
// test all fields in the map are also found in the AcroForm
fieldsByName.keySet().forEach(fieldName -> assertNotNull(acroForm.getField(fieldName)));
}
}
/**
* PDFBOX-3891 AcroForm with empty fields entry
*
* Check if the font resources added by PDFBox matches these by Acrobat
* which are taken from the widget normal appearance resources
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots3891ValidateFont() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf";
String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13014447/merge-test-na-acrobat.pdf";
// will build the expected font respurce names and font decriptor names using the acrobat source document
Map<String, String> fontNames = new HashMap<>();
try (PDDocument testPdf = Loader.loadPDF(RandomAccessReadBuffer
.createBufferFromStream(new URI(acrobatSourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm(null);
PDResources acroFormResources = acroForm.getDefaultResources();
if (acroFormResources != null)
{
acroFormResources.getFontNames().forEach(fontName -> {
try
{
PDFont font = acroFormResources.getFont(fontName);
font.getFontDescriptor().getFontName();
fontNames.put(fontName.getName(), font.getName());
}
catch (IOException ioe)
{
//ignoring
}
});
}
}
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm(new CreateFieldsFixup(testPdf));
PDResources acroFormResources = acroForm.getDefaultResources();
if (acroFormResources != null)
{
acroFormResources.getFontNames().forEach(fontName -> {
try
{
PDFont font = acroFormResources.getFont(fontName);
String pdfBoxFontName = font.getFontDescriptor().getFontName();
assertEquals(fontNames.get(fontName.getName()), pdfBoxFontName, "font resource added by Acrobat shall match font resource added by PDFBox");
}
catch (IOException ioe)
{
//ignoring
}
});
}
}
}
/**
* PDFBOX-3891 null PDFieldFactory.createField
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void testFromAnnots3891NullField() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13016993/poppler-14433-0.pdf";
try (PDDocument testPdf = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(new URI(sourceUrl).toURL().openStream())))
{
PDDocumentCatalog catalog = testPdf.getDocumentCatalog();
assertDoesNotThrow(() -> catalog.getAcroForm(new CreateFieldsFixup(testPdf)), "Getting the AcroForm shall not throw an exception");
}
}
/*
* Create fields from widget annotations
*/
class CreateFieldsFixup extends AbstractFixup
{
CreateFieldsFixup(PDDocument document)
{
super(document);
}
@Override
public void apply() {
new AcroFormOrphanWidgetsProcessor(document).process();
}
}
}