AbstractSpiComponentLoader.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config.loader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.fasterxml.jackson.databind.JsonNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.serialization.ComponentNameResolver;
/**
* Base loader for components that support SPI fallback with exclusions.
* <p>
* Handles the common pattern for loading Parsers, Detectors, and EncodingDetectors:
* <ol>
* <li>Check if config section exists</li>
* <li>Find "default-xxx" marker and extract exclusions</li>
* <li>Load explicitly configured components</li>
* <li>Auto-exclude configured component classes from SPI</li>
* <li>Create Default* composite with combined exclusions</li>
* <li>Post-process (e.g., inject dependencies)</li>
* </ol>
*
* @param <T> the component type (Parser, Detector, EncodingDetector)
*/
public abstract class AbstractSpiComponentLoader<T> implements ComponentLoader<T> {
private static final Logger LOG = LoggerFactory.getLogger(AbstractSpiComponentLoader.class);
private final String sectionName;
private final String defaultMarkerName;
private final Class<T> componentClass;
/**
* Creates a new SPI component loader.
*
* @param sectionName the JSON config section name (e.g., "parsers")
* @param defaultMarkerName the default marker name (e.g., "default-parser")
* @param componentClass the component interface class
*/
protected AbstractSpiComponentLoader(String sectionName, String defaultMarkerName,
Class<T> componentClass) {
this.sectionName = sectionName;
this.defaultMarkerName = defaultMarkerName;
this.componentClass = componentClass;
}
@Override
public T load(TikaJsonConfig config, LoaderContext context) throws TikaConfigException {
if (!config.hasComponentSection(sectionName)) {
// No config section - use full SPI default
T defaultComposite = createDefaultComposite(Collections.emptySet(), context);
return postProcess(defaultComposite, context);
}
List<Map.Entry<String, JsonNode>> entries = config.getArrayComponents(sectionName);
if (entries.isEmpty()) {
T defaultComposite = createDefaultComposite(Collections.emptySet(), context);
return postProcess(defaultComposite, context);
}
// First pass: find default marker and its exclusions
DefaultMarkerConfig<T> markerConfig = findDefaultMarker(entries, context);
// Second pass: load configured components
List<T> components = new ArrayList<>();
Set<Class<? extends T>> configuredClasses = new HashSet<>();
for (Map.Entry<String, JsonNode> entry : entries) {
String name = entry.getKey();
if (defaultMarkerName.equals(name)) {
continue; // Skip marker, handled separately
}
// Check for special handling (e.g., "mime-types" for detectors)
T special = handleSpecialName(name, entry.getValue(), context);
if (special != null) {
components.add(special);
configuredClasses.add(unwrapClass(special));
continue;
}
T component = loadComponent(name, entry.getValue(), context);
components.add(component);
configuredClasses.add(unwrapClass(component));
}
// Combine exclusions: explicit from config + auto (configured classes)
Set<Class<? extends T>> allExclusions = new HashSet<>(markerConfig.exclusions());
allExclusions.addAll(configuredClasses);
// Add SPI components if default marker present
if (markerConfig.present()) {
T defaultComposite = createDefaultComposite(allExclusions, context);
// Allow subclasses to decorate (e.g., mime filtering for parsers)
defaultComposite = decorateDefaultComposite(defaultComposite,
markerConfig.configNode(), context);
if (components.isEmpty()) {
return postProcess(defaultComposite, context);
}
// Insert at marker position to preserve ordering
int insertIndex = Math.min(markerConfig.index(), components.size());
components.add(insertIndex, defaultComposite);
LOG.debug("Loading SPI {} because '{}' is in config", sectionName, defaultMarkerName);
} else {
LOG.debug("Skipping SPI {} - '{}' not in config", sectionName, defaultMarkerName);
}
// Post-process all components (e.g., inject dependencies)
components = postProcessList(components, context);
return wrapInComposite(components, context);
}
// ==================== Abstract methods for subclasses ====================
/**
* Load a single component from config.
* Subclasses can apply decorations (e.g., mime filtering for parsers).
*
* @param name the component name (friendly name or FQCN)
* @param configNode the JSON configuration for this component
* @param context the loader context
* @return the loaded component
* @throws TikaConfigException if loading fails
*/
protected abstract T loadComponent(String name, JsonNode configNode,
LoaderContext context) throws TikaConfigException;
/**
* Create the SPI-backed default composite with exclusions.
* E.g., new DefaultParser(..., exclusions) or new DefaultDetector(..., exclusions)
*
* @param exclusions classes to exclude from SPI loading
* @param context the loader context
* @return the default composite
*/
protected abstract T createDefaultComposite(Set<Class<? extends T>> exclusions,
LoaderContext context);
/**
* Wrap a list of components in a composite.
* E.g., new CompositeParser(registry, list) or new CompositeDetector(registry, list)
*
* @param components the list of components
* @param context the loader context
* @return the composite component
*/
protected abstract T wrapInComposite(List<T> components, LoaderContext context);
// ==================== Optional hooks for subclasses ====================
/**
* Post-process a single component (e.g., inject dependencies).
* Default: returns the component unchanged.
*
* @param component the component to post-process
* @param context the loader context
* @return the post-processed component
* @throws TikaConfigException if post-processing fails
*/
protected T postProcess(T component, LoaderContext context) throws TikaConfigException {
return component;
}
/**
* Post-process a list of components (e.g., inject dependencies).
* Default: calls postProcess on each component.
*
* @param components the components to post-process
* @param context the loader context
* @return the post-processed components
* @throws TikaConfigException if post-processing fails
*/
protected List<T> postProcessList(List<T> components, LoaderContext context)
throws TikaConfigException {
List<T> result = new ArrayList<>();
for (T component : components) {
result.add(postProcess(component, context));
}
return result;
}
/**
* Handle special component names that require custom loading.
* E.g., "mime-types" for detectors returns TikaLoader.getMimeTypes().
* Return null for normal handling.
*
* @param name the component name
* @param configNode the JSON configuration
* @param context the loader context
* @return the special component, or null for normal handling
* @throws TikaConfigException if loading fails
*/
protected T handleSpecialName(String name, JsonNode configNode,
LoaderContext context) throws TikaConfigException {
return null;
}
/**
* Decorate the default composite with additional behavior.
* E.g., for parsers, apply mime filtering from _mime-include/_mime-exclude.
* Default: returns the composite unchanged.
*
* @param composite the default composite
* @param configNode the JSON configuration for the default marker (may be null)
* @param context the loader context
* @return the decorated composite
* @throws TikaConfigException if decoration fails
*/
protected T decorateDefaultComposite(T composite, JsonNode configNode,
LoaderContext context) throws TikaConfigException {
return composite;
}
// ==================== Shared implementation ====================
private DefaultMarkerConfig<T> findDefaultMarker(List<Map.Entry<String, JsonNode>> entries,
LoaderContext context) {
int index = 0;
for (Map.Entry<String, JsonNode> entry : entries) {
if (defaultMarkerName.equals(entry.getKey())) {
Set<Class<? extends T>> exclusions =
parseExclusions(entry.getValue(), context);
return new DefaultMarkerConfig<>(true, index, exclusions, entry.getValue());
}
index++;
}
return new DefaultMarkerConfig<>(false, -1, Collections.emptySet(), null);
}
@SuppressWarnings("unchecked")
private Set<Class<? extends T>> parseExclusions(JsonNode configNode,
LoaderContext context) {
Set<Class<? extends T>> exclusions = new HashSet<>();
if (configNode == null || !configNode.isObject()) {
return exclusions;
}
JsonNode excludeNode = configNode.get("exclude");
if (excludeNode == null || !excludeNode.isArray()) {
return exclusions;
}
for (JsonNode item : excludeNode) {
if (!item.isTextual()) {
continue;
}
String typeName = item.asText();
try {
Class<?> clazz = ComponentNameResolver.resolveClass(
typeName, context.getClassLoader());
exclusions.add((Class<? extends T>) clazz);
LOG.debug("Excluding {} from SPI: {}", sectionName, typeName);
} catch (ClassNotFoundException e) {
LOG.warn("Unknown {} in exclude list: {}", sectionName, typeName);
}
}
return exclusions;
}
/**
* Configuration for the default marker (e.g., "default-parser").
*/
private record DefaultMarkerConfig<U>(
boolean present,
int index,
Set<Class<? extends U>> exclusions,
JsonNode configNode
) {}
/**
* Unwrap a component to get the underlying class for auto-exclusion purposes.
* When a component is wrapped in a decorator (e.g., ParserDecorator for mime filtering),
* we need to exclude the underlying class from SPI, not the decorator class.
* Subclasses can override this for type-specific unwrapping.
*/
@SuppressWarnings("unchecked")
protected Class<? extends T> unwrapClass(T component) {
return (Class<? extends T>) component.getClass();
}
// ==================== Accessors for subclasses ====================
protected String getSectionName() {
return sectionName;
}
protected String getDefaultMarkerName() {
return defaultMarkerName;
}
protected Class<T> getComponentClass() {
return componentClass;
}
}