XMPReader.java
/*
* Copyright (c) 2009, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.imageio.metadata.xmp;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.IIOException;
import javax.imageio.stream.ImageInputStream;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.twelvemonkeys.imageio.metadata.Directory;
import com.twelvemonkeys.imageio.metadata.Entry;
import com.twelvemonkeys.imageio.metadata.MetadataReader;
import com.twelvemonkeys.imageio.util.IIOUtil;
import com.twelvemonkeys.lang.Validate;
/**
* XMPReader
*
* @author <a href="mailto:harald.kuhr@gmail.com">Harald Kuhr</a>
* @author last modified by $Author: haraldk$
* @version $Id: XMPReader.java,v 1.0 Nov 14, 2009 11:04:30 PM haraldk Exp$
*/
public final class XMPReader extends MetadataReader {
// See http://www.scribd.com/doc/56852716/XMPSpecificationPart1
// TODO: Types? Probably defined in XMP/RDF XML schema. Or are we happy that everything is a string?
@Override
public Directory read(final ImageInputStream input) throws IOException {
Validate.notNull(input, "input");
try {
DocumentBuilderFactory factory = createDocumentBuilderFactory();
// TODO: Consider parsing using SAX?
// TODO: Determine encoding and parse using a Reader...
// TODO: Refactor scanner to return inputstream?
// TODO: Be smarter about ASCII-NULL termination/padding (the SAXParser aka Xerces DOMParser doesn't like it)...
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(new DefaultHandler());
Document document = builder.parse(new InputSource(IIOUtil.createStreamAdapter(input)));
String toolkit = getToolkit(document);
Node rdfRoot = document.getElementsByTagNameNS(XMP.NS_RDF, "RDF").item(0);
NodeList descriptions = document.getElementsByTagNameNS(XMP.NS_RDF, "Description");
return parseDirectories(rdfRoot, descriptions, toolkit);
}
catch (SAXException e) {
throw new IIOException(e.getMessage(), e);
}
catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
private DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
// Security: Disable XInclude & expanding entity references ("bombs"), not needed for XMP
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
// Security: Enable "secure processing", to prevent DoS attacks
factory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING, true);
// Security: Remove possibility to access external DTDs or Schema, not needed for XMP
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
// Security: Disable loading of external DTD and entities, not needed for XMP
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
return factory;
}
private String getToolkit(Document document) {
NodeList xmpmeta = document.getElementsByTagNameNS(XMP.NS_X, "xmpmeta");
if (xmpmeta == null || xmpmeta.getLength() <= 0) {
return null;
}
Node toolkit = xmpmeta.item(0).getAttributes().getNamedItemNS(XMP.NS_X, "xmptk");
return toolkit != null ? toolkit.getNodeValue() : null;
}
private XMPDirectory parseDirectories(final Node pParentNode, NodeList pNodes, String toolkit) {
Map<String, List<Entry>> subdirs = new LinkedHashMap<>();
for (Node desc : asIterable(pNodes)) {
if (desc.getParentNode() != pParentNode) {
continue;
}
// Support attribute short-hand syntax
parseAttributesForKnownElements(subdirs, desc);
for (Node node : asIterable(desc.getChildNodes())) {
if (node.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
// Lookup
List<Entry> dir = subdirs.get(node.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<>();
subdirs.put(node.getNamespaceURI(), dir);
}
Object value;
if (isResourceType(node)) {
value = parseAsResource(node);
}
else {
// TODO: This method contains loads of duplication an should be cleaned up...
// Support attribute short-hand syntax
Map<String, List<Entry>> subsubdirs = new LinkedHashMap<>();
parseAttributesForKnownElements(subsubdirs, node);
if (!subsubdirs.isEmpty()) {
List<Entry> entries = new ArrayList<>(subsubdirs.size());
for (Map.Entry<String, List<Entry>> entry : subsubdirs.entrySet()) {
entries.addAll(entry.getValue());
}
value = new RDFDescription(entries);
}
else {
value = getChildTextValue(node);
}
}
dir.add(new XMPEntry(node.getNamespaceURI() + node.getLocalName(), node.getLocalName(), value));
}
}
List<Directory> entries = new ArrayList<>(subdirs.size());
// TODO: Should we still allow asking for a subdirectory by item id?
for (Map.Entry<String, List<Entry>> entry : subdirs.entrySet()) {
entries.add(new RDFDescription(entry.getKey(), entry.getValue()));
}
return new XMPDirectory(entries, toolkit);
}
private boolean isResourceType(Node node) {
Node parseType = node.getAttributes().getNamedItemNS(XMP.NS_RDF, "parseType");
return parseType != null && "Resource".equals(parseType.getNodeValue());
}
private RDFDescription parseAsResource(Node node) {
// See: http://www.w3.org/TR/REC-rdf-syntax/#section-Syntax-parsetype-resource
List<Entry> entries = new ArrayList<>();
for (Node child : asIterable(node.getChildNodes())) {
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
entries.add(new XMPEntry(child.getNamespaceURI() + child.getLocalName(), child.getLocalName(), getChildTextValue(child)));
}
return new RDFDescription(entries);
}
private void parseAttributesForKnownElements(Map<String, List<Entry>> subdirs, Node desc) {
// NOTE: NamedNodeMap does not have any particular order...
NamedNodeMap attributes = desc.getAttributes();
for (Node attr : asIterable(attributes)) {
if (!XMP.ELEMENTS.contains(attr.getNamespaceURI())) {
continue;
}
List<Entry> dir = subdirs.get(attr.getNamespaceURI());
if (dir == null) {
dir = new ArrayList<>();
subdirs.put(attr.getNamespaceURI(), dir);
}
dir.add(new XMPEntry(attr.getNamespaceURI() + attr.getLocalName(), attr.getLocalName(), attr.getNodeValue()));
}
}
private Object getChildTextValue(final Node node) {
for (Node child : asIterable(node.getChildNodes())) {
if (XMP.NS_RDF.equals(child.getNamespaceURI()) && "Alt".equals(child.getLocalName())) {
// Support for <rdf:Alt><rdf:li> -> return a Map<String, Object> keyed on xml:lang
Map<String, Object> alternatives = new LinkedHashMap<>();
for (Node alternative : asIterable(child.getChildNodes())) {
if (XMP.NS_RDF.equals(alternative.getNamespaceURI()) && "li".equals(alternative.getLocalName())) {
NamedNodeMap attributes = alternative.getAttributes();
Node key = attributes.getNamedItem("xml:lang");
alternatives.put(key == null ? null : key.getTextContent(), getChildTextValue(alternative));
}
}
return alternatives;
}
else if (XMP.NS_RDF.equals(child.getNamespaceURI()) && ("Seq".equals(child.getLocalName()) || "Bag".equals(child.getLocalName()))) {
// Support for <rdf:Seq><rdf:li> -> return array
// Support for <rdf:Bag><rdf:li> -> return array/unordered collection (how can a serialized collection not have order?)
List<Object> seq = new ArrayList<>();
for (Node sequence : asIterable(child.getChildNodes())) {
if (XMP.NS_RDF.equals(sequence.getNamespaceURI()) && "li".equals(sequence.getLocalName())) {
Object value = getChildTextValue(sequence);
seq.add(value);
}
}
// TODO: Strictly a bag should not be a list, but there's no Bag type (or similar) in Java.
// Consider something like Google collections Multiset or Apache commons Bag (the former seems more well-defined)
// Note: Collection does not have defined equals() semantics, and so using
// Collections.unmodifiableCollection() doesn't work for comparing values (uses Object.equals())
return Collections.unmodifiableList(seq);
}
}
// Need to support rdf:parseType="Resource" here as well...
if (isResourceType(node)) {
return parseAsResource(node);
}
Node child = node.getFirstChild();
String strVal = child != null ? child.getNodeValue() : null;
return strVal != null ? strVal.trim() : "";
}
private Iterable<? extends Node> asIterable(final NamedNodeMap pNodeList) {
return new Iterable<Node>() {
public Iterator<Node> iterator() {
return new Iterator<Node>() {
private int index;
public boolean hasNext() {
return pNodeList != null && pNodeList.getLength() > index;
}
public Node next() {
return pNodeList.item(index++);
}
public void remove() {
throw new UnsupportedOperationException("Method remove not supported");
}
};
}
};
}
private Iterable<? extends Node> asIterable(final NodeList pNodeList) {
return new Iterable<Node>() {
public Iterator<Node> iterator() {
return new Iterator<Node>() {
private int index;
public boolean hasNext() {
return pNodeList != null && pNodeList.getLength() > index;
}
public Node next() {
return pNodeList.item(index++);
}
public void remove() {
throw new UnsupportedOperationException("Method remove not supported");
}
};
}
};
}
}