DeserializationTest.java
/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
package org.apache.xmpbox.parser;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.TimeZone;
import javax.xml.transform.TransformerException;
import org.apache.xmpbox.DateConverter;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
import org.apache.xmpbox.schema.XMPSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.type.ThumbnailType;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.apache.xmpbox.xml.XmpParsingException.ErrorType;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.apache.xmpbox.xml.XmpSerializer;
/**
* DomXmpParser imports the XML into an internal representation. XmpSerializer exports this into
* XML. The result may look different, but should be the same from a data point of view.
*
* @author Tilman Hausherr
*/
class DeserializationTest
{
private ByteArrayOutputStream baos;
private XmpSerializer serializer;
private DomXmpParser xdb;
private static TimeZone defaultTZ;
@BeforeAll
static void initAll()
{
defaultTZ = TimeZone.getDefault();
// Need to set a timezone or date values will be different depending on test location
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
}
@BeforeEach
void init() throws XmpParsingException
{
baos = new ByteArrayOutputStream();
serializer = new XmpSerializer();
xdb = new DomXmpParser();
}
@AfterAll
static void finishAll()
{
TimeZone.setDefault(defaultTZ);
}
@Test
void testStructuredRecursive() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
// not valid XMP according to https://www.pdflib.com/pdf-knowledge-base/xmp/free-xmp-validator/
try (InputStream is = DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/parser/structured_recursive.xml"))
{
XMPMetadata metadata = xdb.parse(is);
checkTransform(metadata, "62495942572014793625872774972947435765670563107818217447706375288846297812281", metadata.getAllSchemas().size());
}
}
@Test
void testEmptyLi() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/parser/empty_list.xml"))
{
XMPMetadata metadata = xdb.parse(is);
checkTransform(metadata, "95754993383010030299848397520773287413798669761891751126809013411187892693280", metadata.getAllSchemas().size());
}
}
@Test
void testEmptyLi2() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/emptyli.xml"))
{
XMPMetadata metadata = xdb.parse(is);
DublinCoreSchema dc = metadata.getDublinCoreSchema();
dc.getCreatorsProperty();
checkTransform(metadata, "39450703080437563739186076111811684356424147071014681699119272065568305393521", metadata.getAllSchemas().size());
}
}
@Test
void testGetTitle() throws XmpParsingException, BadFieldValueException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/emptyli.xml"))
{
XMPMetadata metadata = xdb.parse(is);
DublinCoreSchema dc = metadata.getDublinCoreSchema();
String s = dc.getTitle(null);
assertEquals("title value", s);
}
}
@Test
void testAltBagSeq() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/parser/AltBagSeqTest.xml"))
{
XMPMetadata metadata = xdb.parse(is);
checkTransform(metadata, "89123270336154452745819041017446278583816329940574853160909598044560152910018", metadata.getAllSchemas().size());
}
}
@Test
void testIsartorStyleWithThumbs()
throws XmpParsingException, BadFieldValueException, TransformerException, NoSuchAlgorithmException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/parser/ThumbisartorStyle.xml"))
{
XMPMetadata metadata = xdb.parse(is);
// <xmpMM:DocumentID>
assertEquals("uuid:09C78666-2F91-3A9C-92AF-3691A6D594F7", metadata.getXMPMediaManagementSchema()
.getDocumentID());
// <xmp:CreateDate>
// <xmp:ModifyDate>
// <xmp:MetadataDate>
assertEquals(DateConverter.toCalendar("2008-01-18T16:59:54+01:00"), metadata.getXMPBasicSchema()
.getCreateDate());
assertEquals(DateConverter.toCalendar("2008-01-18T16:59:54+01:00"), metadata.getXMPBasicSchema()
.getModifyDate());
assertEquals(DateConverter.toCalendar("2008-01-18T16:59:54+01:00"), metadata.getXMPBasicSchema()
.getMetadataDate());
// THUMBNAILS TEST
List<ThumbnailType> thumbs = metadata.getXMPBasicSchema().getThumbnailsProperty();
assertNotNull(thumbs);
assertEquals(2, thumbs.size());
ThumbnailType thumb = thumbs.get(0);
assertEquals(Integer.valueOf(162), thumb.getHeight());
assertEquals(Integer.valueOf(216), thumb.getWidth());
assertEquals("JPEG", thumb.getFormat());
assertEquals("/9j/4AAQSkZJRgABAgEASABIAAD", thumb.getImage());
thumb = thumbs.get(1);
assertEquals(Integer.valueOf(162), thumb.getHeight());
assertEquals(Integer.valueOf(216), thumb.getWidth());
assertEquals("JPEG", thumb.getFormat());
assertEquals("/9j/4AAQSkZJRgABAgEASABIAAD", thumb.getImage());
checkTransform(metadata, "64755266855514150823517184659364700851455308334441170957883187622624192802093", metadata.getAllSchemas().size());
}
}
@Test
void testWithNoXPacketStart() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/noxpacket.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.XpacketBadStart, e.getErrorType());
}
}
@Test
void testWithNoXPacketEnd() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/noxpacketend.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.XpacketBadEnd, e.getErrorType());
}
}
@Test
void testWithNoRDFElement() throws XmpParsingException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/noroot.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.Format, e.getErrorType());
}
}
@Test
void testWithTwoRDFElement() throws XmpParsingException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/tworoot.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.Format, e.getErrorType());
}
}
@Test
void testWithInvalidRDFElementPrefix() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/invalidroot2.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.Format, e.getErrorType());
}
}
@Test
void testWithRDFRootAsText() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/invalidroot.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.Format, e.getErrorType());
}
}
@Test
void testUndefinedSchema() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/undefinedschema.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.NoSchema, e.getErrorType());
}
}
@Test
void testUndefinedPropertyWithDefinedSchema() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/undefinedpropertyindefinedschema.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.NoType, e.getErrorType());
}
}
@Test
void testUndefinedStructuredWithDefinedSchema() throws IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/invalidxmp/undefinedstructuredindefinedschema.xml"))
{
xdb.parse(is);
fail("Should fail during parse");
}
catch (XmpParsingException e)
{
assertEquals(ErrorType.NoValueType, e.getErrorType());
}
}
@Test
void testRdfAboutFound() throws XmpParsingException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/emptyli.xml"))
{
XMPMetadata metadata = xdb.parse(is);
List<XMPSchema> schemas = metadata.getAllSchemas();
for (XMPSchema xmpSchema : schemas)
{
assertNotNull(xmpSchema.getAboutAttribute());
}
}
}
@Test
void testWithAttributesAsProperties() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
try (InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/attr_as_props.xml"))
{
XMPMetadata metadata = xdb.parse(is);
AdobePDFSchema pdf = metadata.getAdobePDFSchema();
assertEquals("GPL Ghostscript 8.64", pdf.getProducer());
DublinCoreSchema dc = metadata.getDublinCoreSchema();
assertEquals("application/pdf", dc.getFormat());
XMPBasicSchema basic = metadata.getXMPBasicSchema();
assertNotNull(basic.getCreateDate());
PDFAIdentificationSchema pdfaIdentificationSchema = metadata.getPDFAIdentificationSchema();
assertEquals("B", pdfaIdentificationSchema.getConformance());
assertEquals(1, pdfaIdentificationSchema.getPart());
XMPMediaManagementSchema xmpMediaManagementSchema = metadata.getXMPMediaManagementSchema();
assertEquals("e7127190-445c-11ea-0000-b3bc74086807", xmpMediaManagementSchema.getDocumentID());
checkTransform(metadata, "27499224985683016678197540524065114038595582230834506941950503218519476041225", metadata.getAllSchemas().size());
}
}
@Test
void testSpaceTextValues() throws XmpParsingException, TransformerException, NoSuchAlgorithmException, IOException
{
// check values with spaces at start or end
// in this case, the value should not be trimmed
try (InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/only_space_fields.xmp"))
{
XMPMetadata metadata = xdb.parse(is);
// check producer
assertEquals(" ", metadata.getAdobePDFSchema().getProducer());
// check creator tool
assertEquals("Canon ", metadata.getXMPBasicSchema().getCreatorTool());
checkTransform(metadata, "9220923061800113567693538810355030344095407871190202111473587642358933618073", metadata.getAllSchemas().size());
}
}
@Test
void testMetadataParsing() throws TransformerException, NoSuchAlgorithmException, XmpParsingException
{
XMPMetadata metadata = XMPMetadata.createXMPMetadata();
DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema();
dc.setCoverage("coverage");
dc.addContributor("contributor1");
dc.addContributor("contributor2");
dc.addDescription("x-default", "Description");
AdobePDFSchema pdf = metadata.createAndAddAdobePDFSchema();
pdf.setProducer("Producer");
pdf.setPDFVersion("1.4");
checkTransform(metadata, "24727341753942351260821151680330022244742411666459385225917195999704816908515", metadata.getAllSchemas().size());
}
/**
* PDFBOX-6029: serialize an empty date property, this brought a NullPointerException.
*
* @throws XmpParsingException
* @throws TransformerException
* @throws NoSuchAlgorithmException
*/
@Test
void testEmptyDate() throws XmpParsingException, TransformerException, NoSuchAlgorithmException
{
String xmpmeta = "<?xpacket begin=\"���\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
+ "<x:xmpmeta x:xmptk=\"Adobe XMP Core 4.2.1-c041 52.342996, 2008/05/07-20:48:00\" xmlns:x=\"adobe:ns:meta/\">\n"
+ " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
+ " <rdf:Description rdf:about=\"\" xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n"
+ " <xmp:CreateDate></xmp:CreateDate>\n"
+ " </rdf:Description>\n"
+ " </rdf:RDF>\n"
+ "</x:xmpmeta>\n"
+ "<?xpacket end=\"w\"?>";
XMPMetadata metadata = xdb.parse(xmpmeta.getBytes(StandardCharsets.UTF_8));
checkTransform(metadata, "19030153876683461724958694183980892665426846590791273142114566290124997390122", metadata.getAllSchemas().size());
}
private void checkTransform(XMPMetadata metadata, String expected, int expectedSchemaCount)
throws TransformerException, NoSuchAlgorithmException, XmpParsingException
{
serializer.serialize(metadata, baos, true);
String replaced = baos.toString(StandardCharsets.UTF_8).replace("\r\n", "\n");
byte[] ba = replaced.getBytes(StandardCharsets.UTF_8);
byte[] digest = MessageDigest.getInstance("SHA-256").digest(ba);
String result = new BigInteger(1, digest).toString();
assertEquals(expected, result, "output:\n" + replaced);
XMPMetadata xmp = xdb.parse(baos.toByteArray()); // tests round trip
assertEquals(expectedSchemaCount, xmp.getAllSchemas().size());
}
}