PDAcroFormFlattenTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.interactive.form;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Stream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.rendering.TestPDFToImage;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
/**
* Test flatten different forms and compare with rendering.
*
* Some of the tests are currently disabled to not run within the CI environment
* as the test results need manual inspection. Enable as needed.
*
*/
@Execution(ExecutionMode.CONCURRENT)
class PDAcroFormFlattenTest
{
private static final File IN_DIR = new File("target/test-output/flatten/in");
private static final File OUT_DIR = new File("target/test-output/flatten/out");
@BeforeAll
static void setUp()
{
IN_DIR.mkdirs();
OUT_DIR.mkdirs();
}
@ParameterizedTest
@CsvSource({
// PDFBOX-142 Filled template.
// disabled as there is a small difference which can not be seen visually
// "https://issues.apache.org/jira/secure/attachment/12742551/Testformular1.pdf,Testformular1.pdf",
// PDFBOX-563 Filled template.
// Disabled as there is a minimal difference which can not be seen visually on ci-builds
// "https://issues.apache.org/jira/secure/attachment/12425859/TestFax_56972.pdf,TestFax_56972.pdf",
// PDFBOX-2469 Empty template.
"https://issues.apache.org/jira/secure/attachment/12682897/FormI-9-English.pdf,FormI-9-English.pdf",
// PDFBOX-2469 Filled template.
// Disabled as there is a minimal difference which can not be seen visually, see PDFBOX-5133
// "https://issues.apache.org/jira/secure/attachment/12678455/testPDF_acroForm.pdf,testPDF_acroForm.pdf",
//PDFBOX-2586 Empty template.
"https://issues.apache.org/jira/secure/attachment/12689788/test.pdf,test-2586.pdf",
// PDFBOX-3083 Filled template rotated.
// disabled as there is a small difference which can not be seen visually
// "https://issues.apache.org/jira/secure/attachment/12770263/mypdf.pdf,mypdf.pdf",
// PDFBOX-3262 Hidden fields.
"https://issues.apache.org/jira/secure/attachment/12792007/hidden_fields.pdf,hidden_fields.pdf",
// PDFBOX-3396 Signed Document 1.
"https://issues.apache.org/jira/secure/attachment/12816014/Signed-Document-1.pdf,Signed-Document-1.pdf",
// PDFBOX-3396 Signed Document 2.
"https://issues.apache.org/jira/secure/attachment/12816016/Signed-Document-2.pdf,Signed-Document-2.pdf",
// PDFBOX-3396 Signed Document 3.
"https://issues.apache.org/jira/secure/attachment/12821307/Signed-Document-3.pdf,Signed-Document-3.pdf",
// PDFBOX-3396 Signed Document 4.
"https://issues.apache.org/jira/secure/attachment/12821308/Signed-Document-4.pdf,Signed-Document-4.pdf",
// PDFBOX-3587 Filled template.
// disabled as there is a small difference which can not be seen visually
// "https://issues.apache.org/jira/secure/attachment/12840280/OpenOfficeForm_filled.pdf,OpenOfficeForm_filled.pdf",
// PDFBOX-4157 Filled template.
// disabled as there is a small difference which can not be seen visually
// "https://issues.apache.org/jira/secure/attachment/12976553/PDFBOX-4157-filled.pdf,PDFBOX-4157-filled.pdf",
// PDFBOX-4172 Filled template.
// disabled as there is a minimal difference which can not be seen visually
// "https://issues.apache.org/jira/secure/attachment/12976552/PDFBOX-4172-filled.pdf,PDFBOX-4172-filled.pdf",
// PDFBOX-4615 Filled template.
// disabled as there is a minimal difference which can not be seen visually on ci-builds
// "https://issues.apache.org/jira/secure/attachment/12976452/resetboundingbox-filled.pdf,PDFBOX-4615-filled.pdf",
// PDFBOX-4693: page is not rotated, but the appearance stream is.
"https://issues.apache.org/jira/secure/attachment/12986337/stenotypeTest-3_rotate_no_flatten.pdf,PDFBOX-4693-filled.pdf",
// PDFBOX-4788: non-widget annotations are not to be removed on a page that has no widget
// annotations.
"https://issues.apache.org/jira/secure/attachment/12994791/flatten.pdf,PDFBOX-4788.pdf",
// PDFBOX-4955: appearance streams with forms that are not used.
"https://issues.apache.org/jira/secure/attachment/13011410/PDFBOX-4955.pdf,PDFBOX-4955.pdf",
// PDFBOX-4958 text and button with image.
// disabled as there is a minimal difference which can not be seen visually on ci-builds
// "https://issues.apache.org/jira/secure/attachment/13012242/PDFBOX-4958.pdf,PDFBOX-4958-flattened.pdf"
})
void testFlatten(String sourceUrl, String targetFileName) throws IOException, URISyntaxException
{
flattenAndCompare(sourceUrl, targetFileName);
}
@Test
void flattenSingleField() throws IOException
{
String filename = "src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/MultilineFields.pdf";
PDDocument document = Loader.loadPDF(new File(filename));
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
int numFieldsBefore = acroForm.getFields().size();
List<PDField> toBeFlattened = new ArrayList<>();
PDTextField field = (PDTextField) acroForm.getField("AlignLeft-Filled");
toBeFlattened.add(field);
acroForm.flatten(toBeFlattened,false);
assertEquals(numFieldsBefore, acroForm.getFields().size() + 1, "the number of form fields shall be reduced by one");
assertNull(acroForm.getField("AlignLeft-Filled"), "the flattened field shall no longer exist");
// Store for manual comparison if needed
// final File OUT_DIR = new File("target/test-output");
// File file = new File(OUT_DIR, "MultilineFields-SingleFieldFlattened.pdf");
// document.save(file);
}
@Test
void flattenTestPDFBOX5254() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13005793/f1040sb%20test.pdf";
String targetFileName = "PDFBOX-4889-5254.pdf";
generateSamples(sourceUrl, targetFileName);
File inputFile = new File(IN_DIR, targetFileName);
File outputFile = new File(OUT_DIR, targetFileName);
try (PDDocument testPdf = Loader.loadPDF(inputFile))
{
testPdf.getDocumentCatalog().getAcroForm().flatten();
testPdf.setAllSecurityToBeRemoved(true);
testPdf.save(outputFile);
assertTrue(testPdf.getDocumentCatalog().getAcroForm(null).getFields().isEmpty());
assertEquals(72, testPdf.getPage(0).getAnnotations().size());
}
// compare rendering
if (!TestPDFToImage.doTestFile(outputFile, IN_DIR.getAbsolutePath(),
OUT_DIR.getAbsolutePath()))
{
fail("Rendering of " + outputFile
+ " failed or is not identical to expected rendering in " + IN_DIR
+ " directory");
}
else
{
// cleanup input and output directory for matching files.
removeAllRenditions(inputFile);
inputFile.delete();
outputFile.delete();
}
}
/**
* Check that only VN_Name is removed in the field tree and in the annotations list. That field
* has an "orphan" widget that belongs to no page.
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void flattenTestPDFBOX5225() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13027311/SourceFailure.pdf";
String targetFileName = "PDFBOX-5225.pdf";
generateSamples(sourceUrl, targetFileName);
File inputFile = new File(IN_DIR, targetFileName);
File outputFile = new File(OUT_DIR, targetFileName);
try (PDDocument testPdf = Loader.loadPDF(inputFile))
{
PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm();
List<PDField> list = new ArrayList<>();
list.add(acroForm.getField("VN_NAME"));
acroForm.flatten(list, false);
testPdf.setAllSecurityToBeRemoved(true);
testPdf.save(outputFile);
int count = 0;
Iterator<PDField> iterator = acroForm.getFieldTree().iterator();
while (iterator.hasNext())
{
iterator.next();
++count;
}
assertEquals(76, count);
assertEquals(59, testPdf.getPage(0).getAnnotations().size());
}
// compare rendering
if (!TestPDFToImage.doTestFile(outputFile, IN_DIR.getAbsolutePath(),
OUT_DIR.getAbsolutePath()))
{
// check manually
System.err.println("Rendering of " + outputFile
+ " failed or is not identical to expected rendering in " + IN_DIR
+ " directory");
}
else
{
// cleanup input and output directory for matching files.
removeAllRenditions(inputFile);
inputFile.delete();
outputFile.delete();
}
}
/*
* Flatten and compare with generated image samples.
*
* @throws IOException
* @throws URISyntaxException
*/
private static void flattenAndCompare(String sourceUrl, String targetFileName)
throws IOException, URISyntaxException
{
generateSamples(sourceUrl,targetFileName);
File inputFile = new File(IN_DIR, targetFileName);
File outputFile = new File(OUT_DIR, targetFileName);
try (PDDocument testPdf = Loader.loadPDF(inputFile))
{
testPdf.getDocumentCatalog().getAcroForm().flatten();
testPdf.setAllSecurityToBeRemoved(true);
assertTrue(testPdf.getDocumentCatalog().getAcroForm().getFields().isEmpty());
testPdf.save(outputFile);
}
// compare rendering
if (!TestPDFToImage.doTestFile(outputFile, IN_DIR.getAbsolutePath(),
OUT_DIR.getAbsolutePath()))
{
fail("Rendering of " + outputFile + " failed or is not identical to expected rendering in " + IN_DIR + " directory");
}
else
{
// cleanup input and output directory for matching files.
removeAllRenditions(inputFile);
inputFile.delete();
outputFile.delete();
}
}
/*
* Generate the sample images to which the PDF will be compared after flatten.
*
* @throws IOException
* @throws URISyntaxException
*/
private static void generateSamples(String sourceUrl, String targetFile)
throws IOException, URISyntaxException
{
getFromUrl(sourceUrl, targetFile);
File file = new File(IN_DIR,targetFile);
try (PDDocument document = Loader.loadPDF(file, (String) null))
{
String outputPrefix = IN_DIR.getAbsolutePath() + '/' + file.getName() + "-";
int numPages = document.getNumberOfPages();
PDFRenderer renderer = new PDFRenderer(document);
for (int i = 0; i < numPages; i++)
{
String fileName = outputPrefix + (i + 1) + ".png";
BufferedImage image = renderer.renderImageWithDPI(i, 96); // Windows native DPI
ImageIO.write(image, "PNG", new File(fileName));
}
}
}
/*
* Get a PDF from URL and copy to file for processing.
*
* @throws IOException
* @throws URISyntaxException
*/
private static void getFromUrl(String sourceUrl, String targetFile)
throws IOException, URISyntaxException
{
try (InputStream is = new URI(sourceUrl).toURL().openStream())
{
Files.copy(is, new File(IN_DIR, targetFile).toPath(), StandardCopyOption.REPLACE_EXISTING);
}
}
/*
* Remove renditions for the PDF from the input directory.
* The output directory will have been cleaned by the TestPDFToImage utility.
*/
private static void removeAllRenditions(final File inputFile)
{
File[] testFiles = inputFile.getParentFile().listFiles(
(File dir, String name) ->
(name.startsWith(inputFile.getName()) && name.toLowerCase().endsWith(".png")));
Stream.of(testFiles).forEach(File::delete);
}
}