ComponentInstantiator.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config.loader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.HashSet;
import java.util.Set;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.tika.config.Initializable;
import org.apache.tika.config.JsonConfig;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.serialization.ComponentNameResolver;
import org.apache.tika.utils.ServiceLoaderUtils;
/**
* Utility class for instantiating Tika components from JSON configuration.
* Provides common logic for all component loaders to avoid code duplication.
*/
public class ComponentInstantiator {
/**
* Instantiates a component with JsonConfig constructor or falls back to zero-arg constructor.
* <p>
* Instantiation strategy:
* <ol>
* <li>Try constructor with JsonConfig parameter</li>
* <li>If not found and JSON config has actual configuration, throw error</li>
* <li>Otherwise fall back to zero-arg constructor via ServiceLoader</li>
* </ol>
*
* @param componentClass the component class to instantiate
* @param jsonConfig the JSON configuration for the component
* @param classLoader the class loader to use
* @param componentTypeName the component type name (e.g., "Detector", "Parser") for error messages
* @param objectMapper the Jackson ObjectMapper for parsing JSON
* @param <T> the component type
* @return the instantiated component
* @throws TikaConfigException if instantiation fails
*/
@SuppressWarnings("unchecked")
public static <T> T instantiate(Class<?> componentClass,
JsonConfig jsonConfig,
ClassLoader classLoader,
String componentTypeName,
ObjectMapper objectMapper)
throws TikaConfigException {
try {
T component;
// Try constructor with JsonConfig parameter
try {
Constructor<?> constructor = componentClass.getConstructor(JsonConfig.class);
component = (T) constructor.newInstance(jsonConfig);
} catch (NoSuchMethodException e) {
// Check if JSON config has actual configuration
if (hasConfiguration(jsonConfig, objectMapper)) {
throw new TikaConfigException(
componentTypeName + " '" + componentClass.getName() + "' has configuration in JSON, " +
"but does not have a constructor that accepts JsonConfig. " +
"Please add a constructor: public " + componentClass.getSimpleName() + "(JsonConfig jsonConfig)");
}
// Fall back to zero-arg constructor if no configuration provided
component = (T) ServiceLoaderUtils.newInstance(componentClass,
new org.apache.tika.config.ServiceLoader(classLoader));
}
// Call initialize() on Initializable components
initializeIfNeeded(component);
return component;
} catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new TikaConfigException("Failed to instantiate " + componentTypeName + ": " +
componentClass.getName(), e);
}
}
/**
* Instantiates a component from a JsonNode configuration.
* <p>
* Instantiation strategy:
* <ol>
* <li>Try constructor with JsonConfig parameter</li>
* <li>Fall back to Jackson bean deserialization if config is provided</li>
* <li>Fall back to zero-arg constructor if no config</li>
* </ol>
*
* @param componentClass the component class to instantiate
* @param configNode the JSON configuration node (may be null or empty)
* @param objectMapper the Jackson ObjectMapper for deserialization
* @param <T> the component type
* @return the instantiated component
* @throws TikaConfigException if instantiation fails
*/
@SuppressWarnings("unchecked")
public static <T> T instantiate(Class<?> componentClass,
JsonNode configNode,
ObjectMapper objectMapper)
throws TikaConfigException {
try {
// Try JsonConfig constructor first
try {
Constructor<?> constructor = componentClass.getConstructor(JsonConfig.class);
String jsonString = configNode != null ? configNode.toString() : "{}";
JsonConfig jsonConfig = () -> jsonString;
return (T) constructor.newInstance(jsonConfig);
} catch (NoSuchMethodException e) {
// No JsonConfig constructor, fall back to other methods
}
// Fall back to no-arg constructor + Jackson bean deserialization (readerForUpdating)
// Using readerForUpdating preserves defaults from the no-arg constructor,
// unlike treeToValue which would null out unspecified fields.
T component;
component = (T) componentClass.getDeclaredConstructor().newInstance();
if (configNode != null && !configNode.isEmpty()) {
objectMapper.readerForUpdating(component).readValue(configNode);
}
// Call initialize() on Initializable components
initializeIfNeeded(component);
return component;
} catch (TikaConfigException e) {
throw e;
} catch (Exception e) {
throw new TikaConfigException(
"Failed to instantiate component '" + componentClass.getName() + "': " + e.getMessage(), e);
}
}
/**
* Instantiates a component by resolving a friendly name or FQCN to a class.
* <p>
* This is a convenience method that combines name resolution with instantiation.
*
* @param typeName the component type name (friendly name like "pdf-parser" or FQCN)
* @param configNode the JSON configuration node (may be null or empty)
* @param objectMapper the Jackson ObjectMapper for deserialization
* @param classLoader the class loader for name resolution
* @param <T> the component type
* @return the instantiated component
* @throws TikaConfigException if instantiation fails or type name is unknown
*/
public static <T> T instantiate(String typeName,
JsonNode configNode,
ObjectMapper objectMapper,
ClassLoader classLoader)
throws TikaConfigException {
try {
Class<?> componentClass = ComponentNameResolver.resolveClass(typeName, classLoader);
return instantiate(componentClass, configNode, objectMapper);
} catch (ClassNotFoundException e) {
throw new TikaConfigException("Unknown component type: '" + typeName + "'", e);
}
}
/**
* Instantiates a Tika component with full special-case handling.
* <p>
* This is the primary entry point for component instantiation from JSON configuration.
* Handles:
* <ul>
* <li>Type resolution via {@link ComponentNameResolver#resolveClass}</li>
* <li>Type compatibility validation against expectedType</li>
* <li>Special cases: DefaultParser/DefaultDetector rejection, MimeTypes singleton</li>
* <li>{@code _mime-include}/{@code _mime-exclude} extraction and stripping</li>
* <li>Three-step instantiation: JsonConfig ctor ��� readerForUpdating ��� no-arg</li>
* <li>{@link Initializable#initialize()} callback</li>
* <li>Parser MIME filter wrapping</li>
* </ul>
*
* @param typeName the component type name (friendly name or FQCN)
* @param configNode the JSON configuration node (may be null)
* @param mapper the ObjectMapper for deserialization
* @param classLoader the class loader for name resolution
* @param expectedType the expected interface/base type (for validation), or null to skip
* @return the instantiated component
* @throws TikaConfigException if instantiation fails
*/
@SuppressWarnings("unchecked")
public static <T> T instantiateComponent(String typeName, JsonNode configNode,
ObjectMapper mapper, ClassLoader classLoader,
Class<?> expectedType)
throws TikaConfigException {
// Resolve the class using ComponentNameResolver
Class<?> clazz;
try {
clazz = ComponentNameResolver.resolveClass(typeName, classLoader);
} catch (ClassNotFoundException e) {
throw new TikaConfigException("Unknown type: " + typeName, e);
}
// Verify type compatibility
if (expectedType != null && !expectedType.isAssignableFrom(clazz)) {
throw new TikaConfigException("Type " + typeName + " (" + clazz.getName() +
") is not assignable to " + expectedType.getName());
}
// DefaultParser and DefaultDetector must be loaded via TikaLoader
if (clazz == DefaultParser.class) {
throw new TikaConfigException("DefaultParser must be loaded via TikaLoader, not " +
"directly via Jackson deserialization. Use TikaLoader.load() to load configuration.");
} else if (clazz == DefaultDetector.class) {
throw new TikaConfigException("DefaultDetector must be loaded via TikaLoader, not " +
"directly via Jackson deserialization. Use TikaLoader.load() to load configuration.");
}
// Extract mime filter fields before stripping them
Set<MediaType> includeTypes = extractMimeTypes(configNode, "_mime-include");
Set<MediaType> excludeTypes = extractMimeTypes(configNode, "_mime-exclude");
// Strip decorator fields before passing to component
JsonNode cleanedConfig = stripDecoratorFields(configNode);
try {
Object instance;
if (clazz == MimeTypes.class) {
// MimeTypes must use the singleton to have all type definitions loaded
instance = MimeTypes.getDefaultMimeTypes();
} else if (cleanedConfig == null || cleanedConfig.isEmpty()) {
// If no config, use default constructor
instance = clazz.getDeclaredConstructor().newInstance();
} else {
// Try JsonConfig constructor first
Constructor<?> jsonConfigCtor = findJsonConfigConstructor(clazz);
if (jsonConfigCtor != null) {
// Use plain JSON mapper since the main mapper may be binary (Smile)
String json = TikaObjectMapperFactory.getPlainMapper()
.writeValueAsString(cleanedConfig);
instance = jsonConfigCtor.newInstance((JsonConfig) () -> json);
} else {
// Fall back to no-arg constructor + Jackson bean deserialization
instance = clazz.getDeclaredConstructor().newInstance();
mapper.readerForUpdating(instance).readValue(cleanedConfig);
}
}
// Call initialize() on Initializable components
initializeIfNeeded(instance);
// Wrap parser with mime filtering if include/exclude types specified
if (instance instanceof Parser && (!includeTypes.isEmpty() || !excludeTypes.isEmpty())) {
instance = ParserDecorator.withMimeFilters(
(Parser) instance, includeTypes, excludeTypes);
}
return (T) instance;
} catch (TikaConfigException e) {
throw e;
} catch (Exception e) {
throw new TikaConfigException("Failed to instantiate: " + typeName, e);
}
}
private static Set<MediaType> extractMimeTypes(JsonNode configNode, String fieldName) {
Set<MediaType> types = new HashSet<>();
if (configNode == null || !configNode.has(fieldName)) {
return types;
}
JsonNode arrayNode = configNode.get(fieldName);
if (arrayNode.isArray()) {
for (JsonNode typeNode : arrayNode) {
types.add(MediaType.parse(typeNode.asText()));
}
}
return types;
}
private static Constructor<?> findJsonConfigConstructor(Class<?> clazz) {
try {
return clazz.getConstructor(JsonConfig.class);
} catch (NoSuchMethodException e) {
return null;
}
}
/**
* Strips decorator fields (_mime-include, _mime-exclude) from config node.
* These fields are handled by TikaLoader for wrapping, not by the component itself.
* Note: _exclude is NOT stripped as it's used by DefaultParser for SPI exclusions.
*/
private static JsonNode stripDecoratorFields(JsonNode configNode) {
if (configNode == null || !configNode.isObject()) {
return configNode;
}
ObjectNode cleaned = configNode.deepCopy();
cleaned.remove("_mime-include");
cleaned.remove("_mime-exclude");
return cleaned;
}
/**
* Checks if the JsonConfig contains actual configuration (non-empty JSON object with fields).
*
* @param jsonConfig the JSON configuration
* @param objectMapper the Jackson ObjectMapper for parsing JSON
* @return true if there's meaningful configuration, false if empty or just "{}"
*/
public static boolean hasConfiguration(JsonConfig jsonConfig, ObjectMapper objectMapper) {
if (jsonConfig == null) {
return false;
}
String json = jsonConfig.json();
if (json == null || json.trim().isEmpty()) {
return false;
}
// Parse to check if it's an empty object or has actual fields
try {
JsonNode node = objectMapper.readTree(json);
// Check if it's an object and has at least one field
if (node.isObject() && node.size() > 0) {
return true;
}
return false;
} catch (Exception e) {
// If we can't parse it, assume it has configuration to be safe
return true;
}
}
/**
* Calls initialize() on the component if it implements Initializable.
*
* @param component the component to initialize
* @param <T> the component type
* @throws TikaConfigException if initialization fails
*/
private static <T> void initializeIfNeeded(T component) throws TikaConfigException {
if (component instanceof Initializable) {
((Initializable) component).initialize();
}
}
}