TikaConfigAsyncWriter.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.async.cli;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.utils.XMLReaderUtils;
class TikaConfigAsyncWriter {
private static final Logger LOG = LoggerFactory.getLogger(TikaAsyncCLI.class);
protected static final String FETCHER_NAME = "fsf";
protected static final String EMITTER_NAME = "fse";
private final SimpleAsyncConfig simpleAsyncConfig;
TikaConfigAsyncWriter(SimpleAsyncConfig simpleAsyncConfig) {
this.simpleAsyncConfig = simpleAsyncConfig;
}
void write(Path output) throws IOException {
try {
_write(output);
} catch (Exception e) {
throw new IOException(e);
}
}
void _write(Path output) throws ParserConfigurationException, TransformerException, IOException, TikaException, SAXException {
Document document = null;
Element properties = null;
if (simpleAsyncConfig.getTikaConfig() != null) {
document = XMLReaderUtils.buildDOM(Paths.get(simpleAsyncConfig.getTikaConfig()));
properties = document.getDocumentElement();
if (! "properties".equals(properties.getLocalName())) {
throw new TikaConfigException("Document element must be '<properties>' in " +
simpleAsyncConfig.getTikaConfig());
}
} else {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
document = dbf.newDocumentBuilder().newDocument();
properties = document.createElement("properties");
document.appendChild(properties);
}
Path baseInput = Paths.get(simpleAsyncConfig.getInputDir());
Path baseOutput = Paths.get(simpleAsyncConfig.getOutputDir());
if (Files.isRegularFile(baseInput)) {
baseInput = baseInput.toAbsolutePath().getParent();
if (baseInput == null) {
throw new IllegalArgumentException("input file must be at least one directory below root");
}
}
Transformer transformer = TransformerFactory
.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
try (Writer writer = Files.newBufferedWriter(output, StandardCharsets.UTF_8)) {
StreamResult result = new StreamResult(writer);
DOMSource source = new DOMSource(document);
transformer.transform(source, result);
}
}
}