FrameworkConfig.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config.loader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.tika.config.JsonConfig;
/**
* Extracts framework-level configuration from component JSON,
* separating fields prefixed with underscore from component-specific config.
*
* <p>Framework fields (underscore prefix):
* <ul>
* <li>{@code _mime-include} - Only handle these mime types</li>
* <li>{@code _mime-exclude} - Don't handle these mime types</li>
* </ul>
*/
public class FrameworkConfig {
private static final String MIME_INCLUDE_KEY = "_mime-include";
private static final String MIME_EXCLUDE_KEY = "_mime-exclude";
// Plain JSON mapper for converting JsonNodes to JSON strings.
// This is needed because the main mapper may use a binary format (e.g., Smile)
// which doesn't support writeValueAsString().
private static final ObjectMapper JSON_MAPPER = new ObjectMapper();
private final ParserDecoration decoration;
private final JsonConfig componentConfigJson;
private final JsonNode componentConfigNode;
private FrameworkConfig(ParserDecoration decoration, JsonConfig componentConfigJson,
JsonNode componentConfigNode) {
this.decoration = decoration;
this.componentConfigJson = componentConfigJson;
this.componentConfigNode = componentConfigNode;
}
/**
* Extracts framework config from JSON node, returning the cleaned component config.
*
* @param configNode the configuration JSON node
* @param objectMapper the Jackson ObjectMapper for serialization
* @return the framework config
* @throws IOException if JSON processing fails
*/
public static FrameworkConfig extract(JsonNode configNode,
ObjectMapper objectMapper) throws IOException {
if (configNode == null || !configNode.isObject()) {
// Use plain JSON mapper since the main mapper may be binary (Smile)
String jsonString = JSON_MAPPER.writeValueAsString(configNode);
JsonConfig jsonConfig = () -> jsonString;
return new FrameworkConfig(null, jsonConfig, configNode);
}
ObjectNode objNode = (ObjectNode) configNode.deepCopy();
// Extract mime filtering config (framework-level, underscore prefix)
List<String> mimeInclude = parseStringList(objNode.remove(MIME_INCLUDE_KEY));
List<String> mimeExclude = parseStringList(objNode.remove(MIME_EXCLUDE_KEY));
ParserDecoration decoration = null;
if (!mimeInclude.isEmpty() || !mimeExclude.isEmpty()) {
decoration = new ParserDecoration(mimeInclude, mimeExclude);
}
// Remaining fields are component-specific config
// Use plain JSON mapper since the main mapper may be binary (Smile)
String jsonString = JSON_MAPPER.writeValueAsString(objNode);
JsonConfig componentConfigJson = () -> jsonString;
return new FrameworkConfig(decoration, componentConfigJson, objNode);
}
private static List<String> parseStringList(JsonNode node) {
if (node == null) {
return Collections.emptyList();
}
List<String> result = new ArrayList<>();
if (node.isArray()) {
for (JsonNode item : node) {
if (item.isTextual()) {
result.add(item.asText());
}
}
} else if (node.isTextual()) {
result.add(node.asText());
}
return result;
}
public ParserDecoration getDecoration() {
return decoration;
}
public JsonConfig getComponentConfigJson() {
return componentConfigJson;
}
public JsonNode getComponentConfigNode() {
return componentConfigNode;
}
/**
* Parser decoration configuration for mime type filtering.
*/
public static class ParserDecoration {
private final List<String> mimeInclude;
private final List<String> mimeExclude;
public ParserDecoration(List<String> mimeInclude, List<String> mimeExclude) {
this.mimeInclude = Collections.unmodifiableList(mimeInclude);
this.mimeExclude = Collections.unmodifiableList(mimeExclude);
}
public List<String> getMimeInclude() {
return mimeInclude;
}
public List<String> getMimeExclude() {
return mimeExclude;
}
public boolean hasFiltering() {
return !mimeInclude.isEmpty() || !mimeExclude.isEmpty();
}
}
}