PDStructureElementTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
*
* @author Tilman Hausherr
*/
class PDStructureElementTest
{
private static final File TARGETPDFDIR = new File("target/pdfs");
/**
* PDFBOX-4197: test that object references in array attributes of a PDStructureElement are caught.
*
* @throws IOException
*/
@Test
void testPDFBox4197() throws IOException
{
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
Set<String> classSet = new HashSet<>();
try (PDDocument doc = Loader.loadPDF(new File(TARGETPDFDIR, "PDFBOX-4197.pdf")))
{
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
}
// collect attributes and check their count.
assertEquals(117, attributeSet.size());
int cnt = attributeSet.stream().map(attributes -> attributes.size()).reduce(0, Integer::sum);
assertEquals(111, cnt); // this one was 105 before PDFBOX-4197 was fixed
assertEquals(0, classSet.size());
}
/**
* Check that all classes are caught and are in the /ClassMap
*
* @throws IOException
*/
@Test
void testClassMap() throws IOException
{
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
Set<String> classSet = new HashSet<>();
try (PDDocument doc = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(PDStructureElementTest.class
.getResourceAsStream("PDFBOX-2725-878725.pdf"))))
{
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
}
// collect attributes and check their count.
assertEquals(72, attributeSet.size());
int cnt = attributeSet.stream().map(attributes -> attributes.size()).reduce(0, Integer::sum);
assertEquals(45, cnt);
assertEquals(10, classSet.size());
}
// Each element can be an array, a dictionary or a number.
// See PDF specification Table 323 - Entries in a structure element dictionary
private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet,
Map<String, Object> classMap, Set<String> classSet)
{
if (base instanceof COSArray)
{
for (COSBase base2 : (COSArray) base)
{
if (base2 instanceof COSObject)
{
base2 = ((COSObject) base2).getObject();
}
checkElement(base2, attributeSet, classMap, classSet);
}
}
else if (base instanceof COSDictionary)
{
COSDictionary kdict = (COSDictionary) base;
if (kdict.containsKey(COSName.PG))
{
PDStructureElement structureElement = new PDStructureElement(kdict);
Revisions<PDAttributeObject> attributes = structureElement.getAttributes();
attributeSet.add(attributes);
Revisions<String> classNames = structureElement.getClassNames();
// "If both the A and C entries are present and a given attribute is specified by both,
// the one specified by the A entry shall take precedence."
if (kdict.containsKey(COSName.C) && !kdict.containsKey(COSName.A))
{
for (int i = 0; i < classNames.size(); ++i)
{
String className = classNames.getObject(i);
classSet.add(className);
Assertions.assertTrue(classMap.containsKey(className), "'" + className + "' not in ClassMap " + classMap);
}
}
}
if (kdict.containsKey(COSName.K))
{
checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap, classSet);
}
}
}
}