PDStructureElementTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.junit.jupiter.api.Test;
/**
*
* @author Tilman Hausherr
*/
class PDStructureElementTest
{
private static final File TARGETPDFDIR = new File("target/pdfs");
/**
* PDFBOX-4197: test that object references in array attributes of a PDStructureElement are caught.
*
* @throws IOException
*/
@Test
void testPDFBox4197() throws IOException
{
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
Set<String> classSet = new HashSet<>();
try (PDDocument doc = Loader.loadPDF(new File(TARGETPDFDIR, "PDFBOX-4197.pdf")))
{
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
}
// collect attributes and check their count.
assertEquals(117, attributeSet.size());
int cnt = attributeSet.stream().map(Revisions::size).reduce(0, Integer::sum);
assertEquals(111, cnt); // this one was 105 before PDFBOX-4197 was fixed
assertEquals(0, classSet.size());
}
/**
* Check that all classes are caught and are in the /ClassMap
*
* @throws IOException
*/
@Test
void testClassMap() throws IOException
{
Set<Revisions<PDAttributeObject>> attributeSet = new HashSet<>();
Set<String> classSet = new HashSet<>();
try (PDDocument doc = Loader.loadPDF(
RandomAccessReadBuffer.createBufferFromStream(PDStructureElementTest.class
.getResourceAsStream("PDFBOX-2725-878725.pdf"))))
{
PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
checkElement(structureTreeRoot.getK(), attributeSet, structureTreeRoot.getClassMap(), classSet);
}
// collect attributes and check their count.
assertEquals(72, attributeSet.size());
int cnt = attributeSet.stream().map(Revisions::size).reduce(0, Integer::sum);
assertEquals(45, cnt);
assertEquals(10, classSet.size());
}
// Each element can be an array, a dictionary or a number.
// See PDF specification Table 323 - Entries in a structure element dictionary
private void checkElement(COSBase base, Set<Revisions<PDAttributeObject>>attributeSet,
Map<String, Object> classMap, Set<String> classSet)
{
if (base instanceof COSArray)
{
for (COSBase base2 : (COSArray) base)
{
if (base2 instanceof COSObject)
{
base2 = ((COSObject) base2).getObject();
}
checkElement(base2, attributeSet, classMap, classSet);
}
}
else if (base instanceof COSDictionary)
{
COSDictionary kdict = (COSDictionary) base;
if (kdict.containsKey(COSName.PG))
{
PDStructureElement structureElement = new PDStructureElement(kdict);
Revisions<PDAttributeObject> attributes = structureElement.getAttributes();
attributeSet.add(attributes);
Revisions<String> classNames = structureElement.getClassNames();
// "If both the A and C entries are present and a given attribute is specified by both,
// the one specified by the A entry shall take precedence."
if (kdict.containsKey(COSName.C) && !kdict.containsKey(COSName.A))
{
for (int i = 0; i < classNames.size(); ++i)
{
String className = classNames.getObject(i);
classSet.add(className);
assertTrue(classMap.containsKey(className), "'" + className + "' not in ClassMap " + classMap);
}
}
}
if (kdict.containsKey(COSName.K))
{
checkElement(kdict.getDictionaryObject(COSName.K), attributeSet, classMap, classSet);
}
}
}
@Test
void testSimple()
{
PDStructureElement structureElement = new PDStructureElement("S", null);
assertEquals(PDStructureElement.TYPE, structureElement.getType());
assertEquals("S", structureElement.getStructureType());
assertNull(structureElement.getParent());
structureElement.setStructureType("T");
assertEquals("T", structureElement.getStructureType());
structureElement.setElementIdentifier("Ident");
assertEquals("Ident", structureElement.getElementIdentifier());
structureElement.setRevisionNumber(33);
assertEquals(33, structureElement.getRevisionNumber());
structureElement.incrementRevisionNumber();
assertEquals(34, structureElement.getRevisionNumber());
assertThrows(IllegalArgumentException.class, () -> structureElement.setRevisionNumber(-1));
structureElement.setTitle("Title");
assertEquals("Title", structureElement.getTitle());
structureElement.setLanguage("Klingon");
assertEquals("Klingon", structureElement.getLanguage());
structureElement.setAlternateDescription("Alto");
assertEquals("Alto", structureElement.getAlternateDescription());
structureElement.setActualText("Actual");
assertEquals("Actual", structureElement.getActualText());
structureElement.setExpandedForm("ExpF");
assertEquals("ExpF", structureElement.getExpandedForm());
assertThrows(IllegalArgumentException.class, () -> structureElement.appendKid(-1));
structureElement.appendKid(0);
PDMarkedContentReference mcr1 = new PDMarkedContentReference();
mcr1.setMCID(1);
structureElement.appendKid(mcr1);
PDMarkedContentReference mcr2 = new PDMarkedContentReference();
mcr2.setMCID(2);
PDMarkedContent mc2 = PDMarkedContent.create(COSName.S, mcr2.getCOSObject());
structureElement.appendKid(mc2);
PDMarkedContentReference mcrSubZero = new PDMarkedContentReference();
assertThrows(IllegalArgumentException.class, () -> mcrSubZero.setMCID(-1));
mcrSubZero.getCOSObject().setInt(COSName.MCID, -1);
PDMarkedContent mcSubZero = PDMarkedContent.create(COSName.S, mcrSubZero.getCOSObject());
assertThrows(IllegalArgumentException.class, () -> structureElement.appendKid(mcSubZero));
List<Object> kids = structureElement.getKids();
assertEquals(3, kids.size());
assertEquals(0, kids.get(0));
mcr1 = (PDMarkedContentReference) kids.get(1);
assertEquals(PDMarkedContentReference.TYPE, mcr1.getCOSObject().getNameAsString(COSName.TYPE));
assertEquals(1, mcr1.getMCID());
assertEquals(2, kids.get(2));
}
}