ComponentRegistry.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config.loader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.tika.config.SelfConfiguring;
import org.apache.tika.exception.TikaConfigException;
/**
* Registry for looking up Tika component classes by name.
* Loads component name-to-class mappings from META-INF/tika/*.idx files
* generated by the {@code @TikaComponent} annotation processor.
* <p>
* The registry tracks:
* <ul>
* <li>Component class</li>
* <li>Whether the component is self-configuring (implements {@link SelfConfiguring})</li>
* <li>Optional explicit context key for ParseContext</li>
* </ul>
* <p>
* Modules that can't use @TikaComponent (due to dependency constraints) can provide
* their own META-INF/tika/*.idx files to register components.
*/
public class ComponentRegistry {
/**
* Built-in aliases for external dependencies.
* Maps component names to fully qualified class names.
*/
private static final Map<String, String> BUILTIN_ALIASES = createBuiltinAliases();
private static Map<String, String> createBuiltinAliases() {
Map<String, String> aliases = new HashMap<>();
// UnpackConfig is in tika-pipes-core which can't depend on tika-core for @TikaComponent
aliases.put("unpack-config",
"org.apache.tika.pipes.core.extractor.UnpackConfig");
return Collections.unmodifiableMap(aliases);
}
private final Map<String, ComponentInfo> components;
private final Map<String, String> classNameToFriendlyName; // Reverse lookup by class name
private final ClassLoader classLoader;
/**
* Creates a component registry by loading the specified index file.
*
* @param indexFileName the index file name (e.g., "parsers", "detectors")
* without the .idx extension
* @param classLoader the class loader to use for loading classes
* @throws TikaConfigException if the index file cannot be loaded
*/
public ComponentRegistry(String indexFileName, ClassLoader classLoader)
throws TikaConfigException {
this.classLoader = classLoader;
this.components = loadComponents(indexFileName);
// Build reverse lookup by class name (not Class object) to handle classloader differences
this.classNameToFriendlyName = new HashMap<>();
for (Map.Entry<String, ComponentInfo> entry : components.entrySet()) {
classNameToFriendlyName.put(entry.getValue().componentClass().getName(), entry.getKey());
}
}
/**
* Looks up a component class by name.
*
* @param name the component name (e.g., "pdf-parser")
* @return the component class
* @throws TikaConfigException if the component name is not found
*/
public Class<?> getComponentClass(String name) throws TikaConfigException {
ComponentInfo info = getComponentInfo(name);
return info.componentClass();
}
/**
* Looks up full component information by name.
*
* @param name the component name (e.g., "pdf-parser")
* @return the component info including class, selfConfiguring flag, and contextKey
* @throws TikaConfigException if the component name is not found
*/
public ComponentInfo getComponentInfo(String name) throws TikaConfigException {
ComponentInfo info = components.get(name);
if (info == null) {
throw new TikaConfigException("Unknown component name: '" + name + "'. " +
"Available components: " + components.keySet());
}
return info;
}
/**
* Returns all registered component names.
*
* @return unmodifiable map of component names to component info
*/
public Map<String, ComponentInfo> getAllComponents() {
return Collections.unmodifiableMap(components);
}
/**
* Returns all components marked as defaults.
*
* @return unmodifiable map of component names to component info for default implementations
*/
public Map<String, ComponentInfo> getDefaultComponents() {
Map<String, ComponentInfo> defaults = new LinkedHashMap<>();
for (Map.Entry<String, ComponentInfo> entry : components.entrySet()) {
if (entry.getValue().isDefault()) {
defaults.put(entry.getKey(), entry.getValue());
}
}
return Collections.unmodifiableMap(defaults);
}
/**
* Checks if a component with the given name is registered.
*
* @param name the component name
* @return true if the component is registered
*/
public boolean hasComponent(String name) {
return components.containsKey(name);
}
/**
* Looks up a component's friendly name by its class.
* Uses class name (not Class object) for lookup to handle classloader differences.
*
* @param clazz the component class
* @return the friendly name, or null if not registered
*/
public String getFriendlyName(Class<?> clazz) {
return classNameToFriendlyName.get(clazz.getName());
}
private Map<String, ComponentInfo> loadComponents(String indexFileName)
throws TikaConfigException {
Map<String, ComponentInfo> result = new LinkedHashMap<>();
String resourcePath = "META-INF/tika/" + indexFileName + ".idx";
try {
Enumeration<URL> resources = classLoader.getResources(resourcePath);
if (!resources.hasMoreElements()) {
throw new TikaConfigException("Component index file not found: " + resourcePath);
}
while (resources.hasMoreElements()) {
URL url = resources.nextElement();
loadFromUrl(url, result);
}
} catch (IOException e) {
throw new TikaConfigException("Failed to load component index: " + resourcePath, e);
}
return result;
}
private void loadFromUrl(URL url, Map<String, ComponentInfo> result) throws TikaConfigException {
try (InputStream in = url.openStream();
BufferedReader reader = new BufferedReader(
new InputStreamReader(in, StandardCharsets.UTF_8))) {
String line;
int lineNumber = 0;
while ((line = reader.readLine()) != null) {
lineNumber++;
line = line.trim();
// Skip comments and empty lines
if (line.isEmpty() || line.startsWith("#")) {
continue;
}
// Parse: component-name=fully.qualified.ClassName[:key=contextKeyClass]
int equalsIndex = line.indexOf('=');
if (equalsIndex == -1) {
throw new TikaConfigException(
"Invalid index file format at " + url + " line " + lineNumber +
": expected 'name=class', got: " + line);
}
String name = line.substring(0, equalsIndex).trim();
String value = line.substring(equalsIndex + 1).trim();
if (name.isEmpty() || value.isEmpty()) {
throw new TikaConfigException(
"Invalid index file format at " + url + " line " + lineNumber +
": name or class is empty");
}
// Parse value: className or className:key=contextKeyClass[:default]
String className = value;
String contextKeyClassName = null;
boolean isDefault = false;
// Parse suffixes (e.g., :key=SomeClass:default)
int colonIndex = value.indexOf(':');
if (colonIndex != -1) {
className = value.substring(0, colonIndex);
String suffixes = value.substring(colonIndex + 1);
// Parse each colon-separated suffix
for (String suffix : suffixes.split(":")) {
if (suffix.startsWith("key=")) {
contextKeyClassName = suffix.substring(4);
} else if (suffix.equals("default")) {
isDefault = true;
} else if (!suffix.isEmpty()) {
throw new TikaConfigException(
"Invalid index file format at " + url + " line " + lineNumber +
": unknown suffix '" + suffix + "', expected 'key=...' or 'default'");
}
}
}
// Load the component class
try {
Class<?> clazz = classLoader.loadClass(className);
boolean selfConfiguring = SelfConfiguring.class.isAssignableFrom(clazz);
// Load the context key class if specified
Class<?> contextKey = null;
if (contextKeyClassName != null) {
try {
contextKey = classLoader.loadClass(contextKeyClassName);
} catch (ClassNotFoundException e) {
throw new TikaConfigException(
"Context key class not found: " + contextKeyClassName +
" (from " + url + ")", e);
}
}
result.put(name, new ComponentInfo(clazz, selfConfiguring, contextKey, isDefault));
} catch (ClassNotFoundException e) {
throw new TikaConfigException(
"Component class not found: " + className + " (from " + url + ")", e);
}
}
} catch (IOException e) {
throw new TikaConfigException("Failed to read component index from: " + url, e);
}
}
}