TikaCLIAsyncTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.cli;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.tika.config.JsonConfigHelper;

public class TikaCLIAsyncTest {

    private static final Logger LOG = LoggerFactory.getLogger(TikaCLI.class);

    static final File TEST_DATA_FILE = new File("src/test/resources/test-data");

    /* Test members */
    private ByteArrayOutputStream outContent = null;
    private ByteArrayOutputStream errContent = null;
    private PrintStream stdout = null;
    private PrintStream stderr = null;

    private static Path TIKA_CONFIG;

    @TempDir
    private static Path ASYNC_OUTPUT_DIR;

    @BeforeAll
    public static void setUpClass() throws Exception {
        TIKA_CONFIG = Files.createTempFile(ASYNC_OUTPUT_DIR, "plugins-", ".json");

        Path pluginsDir = Paths.get("target/plugins");
        if (!Files.isDirectory(pluginsDir)) {
            LOG.warn("CAN'T FIND PLUGINS DIR. pwd={}", Paths.get("").toAbsolutePath().toString());
        }

        Map<String, Object> replacements = new HashMap<>();
        replacements.put("FETCHER_BASE_PATH", TEST_DATA_FILE.toPath());
        replacements.put("EMITTER_BASE_PATH", ASYNC_OUTPUT_DIR);
        replacements.put("PLUGIN_ROOTS", pluginsDir);

        JsonConfigHelper.writeConfigFromResource("/configs/config-template.json",
                TikaCLIAsyncTest.class, replacements, TIKA_CONFIG);
    }

    /**
     * reset resourcePrefix
     * save original System.out and System.err
     * clear outContent and errContent if they are not empty
     * set outContent and errContent as System.out and System.err
     */
    @BeforeEach
    public void setUp() throws Exception {
        stdout = System.out;
        stderr = System.err;
        resetContent();
    }

    /**
     * Tears down the test. Returns the System.out and System.err
     */
    @AfterEach
    public void tearDown() {
        System.setOut(stdout);
        System.setErr(stderr);
    }

    /**
     * clear outContent and errContent if they are not empty by create a new one.
     * set outContent and errContent as System.out and System.err
     */
    private void resetContent() throws Exception {
        if (outContent == null || outContent.size() > 0) {
            outContent = new ByteArrayOutputStream();
            System.setOut(new PrintStream(outContent, true, UTF_8.name()));
        }

        if (errContent == null || errContent.size() > 0) {
            errContent = new ByteArrayOutputStream();
            System.setErr(new PrintStream(errContent, true, UTF_8.name()));
        }
    }


    @Test
    public void testAsync() throws Exception {
        //extension is "jsn" to avoid conflict with json config

        String content = getParamOutContent("-a", "-c", TIKA_CONFIG.toAbsolutePath().toString());

        int json = 0;
        for (File f : ASYNC_OUTPUT_DIR
                .toFile()
                .listFiles()) {
            if (f
                    .getName()
                    .endsWith(".jsn")) {
                //check one file for pretty print
                if (f
                        .getName()
                        .equals("coffee.xls.jsn")) {
                    //TODO -- turn this back on
                    // checkForPrettyPrint(f);
                }
                json++;
            }
        }
        assertEquals(18, json);
    }

    private void checkForPrettyPrint(File f) throws IOException {
        String json = FileUtils.readFileToString(f, UTF_8);
        int previous = json.indexOf("Content-Length");
        assertTrue(previous > -1);
        for (String k : new String[]{"Content-Type", "dc:creator", "dcterms:created", "dcterms:modified", "X-TIKA:content\""}) {
            int i = json.indexOf(k);
            assertTrue(i > -1, "should have found " + k);
            assertTrue(i > previous, "bad order: " + k + " at " + i + " not less than " + previous);
            previous = i;
        }
    }

    /**
     * reset outContent and errContent if they are not empty
     * run given params in TikaCLI and return outContent String with UTF-8
     */
    String getParamOutContent(String... params) throws Exception {
        resetContent();
        TikaCLI.main(params);
        return outContent.toString("UTF-8");
    }

}