PDFCloneUtility.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.multipdf;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
/**
* Utility class used to clone PDF objects. It keeps track of objects it has already cloned.
*
*/
public class PDFCloneUtility
{
private static final Logger LOG = LogManager.getLogger(PDFCloneUtility.class);
private final PDDocument destination;
private final Map<COSBase, COSBase> clonedVersion = new HashMap<>();
private final Set<COSBase> clonedValues = new HashSet<>();
// It might be useful to use IdentityHashMap like in PDFBOX-4477 for speed,
// but we need a really huge file to test this. A test with the file from PDFBOX-4477
// did not show a noticeable speed difference.
/**
* Creates a new instance for the given target document.
*
* @param dest the destination PDF document that will receive the clones
*/
protected PDFCloneUtility(PDDocument dest)
{
this.destination = dest;
}
/**
* Returns the destination PDF document this cloner instance is set up for.
*
* @return the destination PDF document
*/
PDDocument getDestination()
{
return this.destination;
}
/**
* Deep-clones the given object for inclusion into a different PDF document identified by the destination parameter.
*
* Expert use only, don���t use it if you don���t know exactly what you are doing.
*
* @param base the initial object as the root of the deep-clone operation
* @return the cloned instance of the base object
* @throws IOException if an I/O error occurs
*/
@SuppressWarnings("unchecked")
public <TCOSBase extends COSBase> TCOSBase cloneForNewDocument(TCOSBase base) throws IOException
{
if (base == null)
{
return null;
}
COSBase retval = clonedVersion.get(base);
if (retval != null)
{
// we are done, it has already been converted.
return (TCOSBase) retval;
}
if (clonedValues.contains(base))
{
// Don't clone a clone
return base;
}
retval = cloneCOSBaseForNewDocument(base);
clonedVersion.put(base, retval);
clonedValues.add(retval);
return (TCOSBase) retval;
}
private COSBase cloneCOSBaseForNewDocument(COSBase base) throws IOException
{
if (base instanceof COSObject)
{
return cloneForNewDocument(((COSObject) base).getObject());
}
if (base instanceof COSArray)
{
return cloneCOSArray((COSArray) base);
}
if (base instanceof COSStream)
{
return cloneCOSStream((COSStream) base);
}
if (base instanceof COSDictionary)
{
return cloneCOSDictionary((COSDictionary) base);
}
return base;
}
private COSArray cloneCOSArray(COSArray array) throws IOException
{
COSArray newArray = new COSArray();
for (int i = 0; i < array.size(); i++)
{
COSBase value = array.get(i);
if (hasSelfReference(array, value))
{
newArray.add(newArray);
}
else
{
newArray.add(cloneForNewDocument(value));
}
}
return newArray;
}
private COSStream cloneCOSStream(COSStream stream) throws IOException
{
COSStream newStream = destination.getDocument().createCOSStream();
try (OutputStream output = newStream.createRawOutputStream();
InputStream input = stream.createRawInputStream())
{
input.transferTo(output);
}
clonedVersion.put(stream, newStream);
for (Map.Entry<COSName, COSBase> entry : stream.entrySet())
{
COSBase value = entry.getValue();
if (hasSelfReference(stream, value))
{
newStream.setItem(entry.getKey(), newStream);
}
else
{
newStream.setItem(entry.getKey(), cloneForNewDocument(value));
}
}
return newStream;
}
private COSDictionary cloneCOSDictionary(COSDictionary dictionary) throws IOException
{
COSDictionary newDictionary = new COSDictionary();
clonedVersion.put(dictionary, newDictionary);
for (Map.Entry<COSName, COSBase> entry : dictionary.entrySet())
{
COSBase value = entry.getValue();
if (hasSelfReference(dictionary, value))
{
newDictionary.setItem(entry.getKey(), newDictionary);
}
else
{
newDictionary.setItem(entry.getKey(), cloneForNewDocument(value));
}
}
return newDictionary;
}
/**
* Merges two objects of the same type by deep-cloning its members. <br>
* Base and target must be instances of the same class.
*
* @param base the base object to be cloned
* @param target the merge target
* @throws IOException if an I/O error occurs
*/
void cloneMerge(final COSObjectable base, COSObjectable target) throws IOException
{
if (base == null || base == target)
{
return;
}
cloneMergeCOSBase(base.getCOSObject(), target.getCOSObject());
}
private void cloneMergeCOSBase(final COSBase source, final COSBase target) throws IOException
{
COSBase sourceBase = source instanceof COSObject ? ((COSObject) source).getObject()
: source;
COSBase targetBase = target instanceof COSObject ? ((COSObject) target).getObject()
: target;
if (sourceBase instanceof COSArray && targetBase instanceof COSArray)
{
COSArray array = (COSArray) sourceBase;
for (int i = 0; i < array.size(); i++)
{
((COSArray) targetBase).add(cloneForNewDocument(array.get(i)));
}
}
else if (sourceBase instanceof COSDictionary && targetBase instanceof COSDictionary)
{
COSDictionary sourceDict = (COSDictionary) sourceBase;
COSDictionary targetDict = (COSDictionary) targetBase;
for (Map.Entry<COSName, COSBase> entry : sourceDict.entrySet())
{
COSName key = entry.getKey();
COSBase value = entry.getValue();
if (targetDict.getItem(key) != null)
{
cloneMerge(value, targetDict.getItem(key));
}
else
{
targetDict.setItem(key, cloneForNewDocument(value));
}
}
}
}
/**
* Check whether an element (of an array or a dictionary) points to its parent.
*
* @param parent COSArray or COSDictionary
* @param value an element
*/
private boolean hasSelfReference(COSBase parent, COSBase value)
{
if (value instanceof COSObject)
{
COSBase actual = ((COSObject) value).getObject();
if (actual == parent)
{
COSObject cosObj = ((COSObject) value);
LOG.warn("{} object has a reference to itself: {}",
parent.getClass().getSimpleName(), cosObj.getKey());
return true;
}
}
return false;
}
}