TikaToXMPTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.xmp;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import com.adobe.internal.xmp.XMPConst;
import com.adobe.internal.xmp.XMPException;
import com.adobe.internal.xmp.XMPIterator;
import com.adobe.internal.xmp.XMPMeta;
import com.adobe.internal.xmp.XMPMetaFactory;
import com.adobe.internal.xmp.properties.XMPProperty;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.xmp.convert.ITikaToXMPConverter;
import org.apache.tika.xmp.convert.MSOfficeXMLConverter;
import org.apache.tika.xmp.convert.TikaToXMP;

/**
 * Tests the Tika <code>Metadata</code> to XMP conversion functionatlity
 */
public class TikaToXMPTest {
    private static final String OOXML_MIMETYPE =
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
    private static final String GENERIC_MIMETYPE = "generic/mimetype";
    private Metadata tikaMetadata;

    // --- Set up ---
    @BeforeEach
    public void setup() {
        tikaMetadata = new Metadata();
    }

    private void setupOOXMLMetadata(Metadata metadata) {
        // simple property
        metadata.set(TikaCoreProperties.LANGUAGE, "language");
        // language alternative
        metadata.set(TikaCoreProperties.TITLE, "title");
        // comma separated array
        metadata.set(TikaCoreProperties.SUBJECT, "keyword1,keyword2");
        // OOXML specific simple prop
        metadata.set(TikaCoreProperties.MODIFIER, "lastModifiedBy");
    }

    private void checkOOXMLMetadata(XMPMeta xmp) throws XMPException {
        // check simple property
        XMPProperty prop = xmp.getProperty(XMPConst.NS_DC, "language");
        assertNotNull(prop);
        assertEquals("language", prop.getValue());

        // check lang alt
        prop = xmp.getLocalizedText(XMPConst.NS_DC, "title", null, XMPConst.X_DEFAULT);
        assertNotNull(prop);
        assertEquals("title", prop.getValue());

        // check array
        prop = xmp.getArrayItem(XMPConst.NS_DC, "subject", 1);
        assertNotNull(prop);
        assertEquals("keyword1", prop.getValue());
        prop = xmp.getArrayItem(XMPConst.NS_DC, "subject", 2);
        assertNotNull(prop);
        assertEquals("keyword2", prop.getValue());

        // check OOXML specific simple property
        prop = xmp.getProperty(OfficeOpenXMLCore.NAMESPACE_URI, "lastModifiedBy");
        assertNotNull(prop);
        assertEquals("lastModifiedBy", prop.getValue());
    }

    // --- TESTS ---
    @Test
    public void convert_OOXMLMetadataWithMimetype_everythingConverted()
            throws XMPException, TikaException {
        setupOOXMLMetadata(tikaMetadata);
        tikaMetadata.set(Metadata.CONTENT_TYPE, OOXML_MIMETYPE);

        XMPMeta xmp = TikaToXMP.convert(tikaMetadata);

        checkOOXMLMetadata(xmp);
    }

    @Test
    public void convert_OOXMLMetadataWithExtraMimetype_everythingConverted()
            throws XMPException, TikaException {
        setupOOXMLMetadata(tikaMetadata);

        XMPMeta xmp = TikaToXMP.convert(tikaMetadata, OOXML_MIMETYPE);

        checkOOXMLMetadata(xmp);
    }

    @Test
    public void convert_OOXMLMetadataWithoutMimetype_onlyGeneralMetadataconverted()
            throws XMPException, TikaException {
        setupOOXMLMetadata(tikaMetadata);

        XMPMeta xmp = TikaToXMP.convert(tikaMetadata, null);

        // general metadata is converted
        // check simple property
        XMPProperty prop = xmp.getArrayItem(XMPConst.NS_DC, "language", 1);
        assertNotNull(prop);
        assertEquals("language", prop.getValue());

        // check lang alt
        prop = xmp.getLocalizedText(XMPConst.NS_DC, "title", null, XMPConst.X_DEFAULT);
        assertNotNull(prop);
        assertEquals("title", prop.getValue());

        // OOXML one is not, the namespace has also not been registered as the converter has not
        // been used
        XMPMetaFactory.getSchemaRegistry()
                .registerNamespace(OfficeOpenXMLCore.NAMESPACE_URI, OfficeOpenXMLCore.PREFIX);
        prop = xmp.getProperty(OfficeOpenXMLCore.NAMESPACE_URI, "lastModifiedBy");
        assertNull(prop);
    }

    @Test
    public void convert_genericMetadataAllQualified_allConverted()
            throws XMPException, TikaException {
        // simple property
        tikaMetadata.set(TikaCoreProperties.FORMAT, GENERIC_MIMETYPE);
        // language alternative
        tikaMetadata.set(TikaCoreProperties.TITLE, "title");
        // array
        tikaMetadata.set(TikaCoreProperties.SUBJECT, new String[]{"keyword1", "keyword2"});

        XMPMeta xmp = TikaToXMP.convert(tikaMetadata, null);

        // check simple property
        XMPProperty prop = xmp.getProperty(XMPConst.NS_DC, "format");
        assertNotNull(prop);
        assertEquals(GENERIC_MIMETYPE, prop.getValue());

        // check lang alt
        prop = xmp.getLocalizedText(XMPConst.NS_DC, "title", null, XMPConst.X_DEFAULT);
        assertNotNull(prop);
        assertEquals("title", prop.getValue());

        // check array
        prop = xmp.getArrayItem(XMPConst.NS_DC, "subject", 1);
        assertNotNull(prop);
        assertEquals("keyword1", prop.getValue());
        prop = xmp.getArrayItem(XMPConst.NS_DC, "subject", 2);
        assertNotNull(prop);
        assertEquals("keyword2", prop.getValue());
    }

    @Test
    public void convert_wrongGenericMetadata_notConverted() throws XMPException, TikaException {
        // unknown prefix
        tikaMetadata.set("unknown:key", "unknownPrefixValue");
        // not qualified key
        tikaMetadata.set("wrongKey", "wrongKeyValue");

        XMPMeta xmp = TikaToXMP.convert(tikaMetadata, null);

        // XMP is empty
        XMPIterator iter = xmp.iterator();
        assertFalse(iter.hasNext());
    }

    @Test
    public void convert_nullInput_throw() throws TikaException {
        assertThrows(IllegalArgumentException.class, () -> {
            TikaToXMP.convert(null);
        });
    }

    @Test
    public void isConverterAvailable_availableMime_true() {
        assertTrue(TikaToXMP.isConverterAvailable(OOXML_MIMETYPE));
    }

    @Test
    public void isConverterAvailable_noAvailableMime_false() {
        assertFalse(TikaToXMP.isConverterAvailable(GENERIC_MIMETYPE));
    }

    @Test
    public void isConverterAvailable_nullInput_false() {
        assertFalse(TikaToXMP.isConverterAvailable(null));
    }

    @Test
    public void getConverter_ConverterAvailable_class() throws TikaException {
        ITikaToXMPConverter converter = TikaToXMP.getConverter(OOXML_MIMETYPE);
        assertNotNull(converter);
        assertTrue(converter instanceof MSOfficeXMLConverter);
    }

    @Test
    public void getConverter_noConverterAvailable_null() throws TikaException {
        ITikaToXMPConverter converter = TikaToXMP.getConverter(GENERIC_MIMETYPE);
        assertNull(converter);
    }

    @Test
    public void getConverter_nullInput_throw() throws TikaException {
        assertThrows(IllegalArgumentException.class, () -> {
            TikaToXMP.getConverter(null);
        });
    }

    @Test
    public void testMultithreaded() throws Exception {
        int numThreads = 10;
        final int numIterations = 100;
        ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
        try {
            ExecutorCompletionService<Integer> executorCompletionService = new ExecutorCompletionService<>(executorService);
            for (int i = 0; i < numThreads; i++) {
                executorCompletionService.submit(new Callable<Integer>() {
                    @Override
                    public Integer call() throws Exception {
                        for (int j = 0; j < numIterations; j++) {
                            Metadata m = new Metadata();
                            setupOOXMLMetadata(m);
                            m.set(Metadata.CONTENT_TYPE, OOXML_MIMETYPE);
                            XMPMeta xmp = TikaToXMP.convert(m);
                            checkOOXMLMetadata(xmp);
                        }
                        return 1;
                    }
                });
            }
            int finished = 0;
            while (finished < numThreads) {
                Future<Integer> future = executorCompletionService.poll(1, TimeUnit.MINUTES);
                if (future == null) {
                    throw new TimeoutException();
                }
                future.get();
                finished++;
            }
        } finally {
            executorService.shutdownNow();
        }
    }
}