TikaComponentProcessor.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.annotation;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.processing.AbstractProcessor;
import javax.annotation.processing.Filer;
import javax.annotation.processing.Messager;
import javax.annotation.processing.ProcessingEnvironment;
import javax.annotation.processing.RoundEnvironment;
import javax.annotation.processing.SupportedAnnotationTypes;
import javax.annotation.processing.SupportedSourceVersion;
import javax.lang.model.SourceVersion;
import javax.lang.model.element.AnnotationMirror;
import javax.lang.model.element.AnnotationValue;
import javax.lang.model.element.Element;
import javax.lang.model.element.ExecutableElement;
import javax.lang.model.element.TypeElement;
import javax.lang.model.type.DeclaredType;
import javax.lang.model.type.TypeMirror;
import javax.tools.Diagnostic;
import javax.tools.FileObject;
import javax.tools.StandardLocation;
import org.apache.tika.config.TikaComponent;
/**
* Annotation processor for {@link TikaComponent} that generates:
* <ul>
* <li>Standard Java SPI files (META-INF/services/*) for ServiceLoader</li>
* <li>Component index files (META-INF/tika/*.idx) for name-based lookup</li>
* </ul>
*
* <p>The processor maintains an inclusion list of known Tika service interfaces
* to avoid generating SPI files for utility interfaces like Serializable, etc.
*/
@SupportedAnnotationTypes("org.apache.tika.config.TikaComponent")
@SupportedSourceVersion(SourceVersion.RELEASE_17)
public class TikaComponentProcessor extends AbstractProcessor {
/**
* Known Tika service interfaces for SPI generation.
* Only classes implementing these interfaces will have SPI files generated.
* <p>
* Components that don't implement any of these interfaces (e.g., DigesterFactory,
* ContentHandlerFactory implementations) go to parse-context.idx instead.
* These should specify their contextKey explicitly via @TikaComponent(contextKey=...).
*/
private static final Map<String, String> SERVICE_INTERFACES = new LinkedHashMap<>();
static {
// Map interface fully qualified name -> index file name
SERVICE_INTERFACES.put("org.apache.tika.parser.Parser", "parsers");
SERVICE_INTERFACES.put("org.apache.tika.detect.Detector", "detectors");
SERVICE_INTERFACES.put("org.apache.tika.detect.EncodingDetector", "encoding-detectors");
SERVICE_INTERFACES.put("org.apache.tika.language.detect.LanguageDetector", "language-detectors");
SERVICE_INTERFACES.put("org.apache.tika.language.translate.Translator", "translators");
SERVICE_INTERFACES.put("org.apache.tika.renderer.Renderer", "renderers");
SERVICE_INTERFACES.put("org.apache.tika.metadata.filter.MetadataFilter", "metadata-filters");
}
private Messager messager;
private Filer filer;
// Accumulate components across rounds
// Map: service interface name -> set of implementing class names
private final Map<String, Set<String>> spiServices = new HashMap<>();
// Map: index file name -> map of (component name -> class name)
private final Map<String, Map<String, String>> indexFiles = new HashMap<>();
@Override
public synchronized void init(ProcessingEnvironment processingEnv) {
super.init(processingEnv);
this.messager = processingEnv.getMessager();
this.filer = processingEnv.getFiler();
}
@Override
public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) {
if (roundEnv.processingOver()) {
// Final round - write accumulated data
writeServiceFiles();
writeIndexFiles();
return true;
}
for (Element element : roundEnv.getElementsAnnotatedWith(TikaComponent.class)) {
if (element instanceof TypeElement) {
processComponent((TypeElement) element);
}
}
return true;
}
private void processComponent(TypeElement element) {
String className = element.getQualifiedName().toString();
TikaComponent annotation = element.getAnnotation(TikaComponent.class);
// Determine component name
String componentName = annotation.name();
if (componentName == null || componentName.isEmpty()) {
// Auto-generate from class name
String simpleName = element.getSimpleName().toString();
componentName = KebabCaseConverter.toKebabCase(simpleName);
}
// Check if component should be included in SPI
boolean includeSpi = annotation.spi();
// Get contextKey if specified (need to use mirror API for Class types)
String contextKey = getContextKeyFromAnnotation(element);
// Get defaultFor if specified (need to use mirror API for Class types)
String defaultFor = getDefaultForFromAnnotation(element);
messager.printMessage(Diagnostic.Kind.NOTE,
"Processing @TikaComponent: " + className + " -> " + componentName +
" (SPI: " + includeSpi + ", contextKey: " + contextKey +
", defaultFor: " + defaultFor + ")");
// Find all implemented service interfaces
List<String> serviceInterfaces = findServiceInterfaces(element);
// Build the index entry value (className or className:key=X[:default])
// Auto-detect contextKey from service interface if not explicitly specified
String indexValue = className;
if (contextKey != null) {
// Explicit contextKey specified
indexValue = className + ":key=" + contextKey;
} else if (serviceInterfaces.size() == 1) {
// Auto-detect contextKey from single service interface
indexValue = className + ":key=" + serviceInterfaces.get(0);
messager.printMessage(Diagnostic.Kind.NOTE,
"Auto-detected contextKey=" + serviceInterfaces.get(0) + " for " + className);
} else if (serviceInterfaces.size() > 1) {
// Multiple interfaces - warn that contextKey should be specified
messager.printMessage(Diagnostic.Kind.WARNING,
"Class " + className + " implements multiple interfaces: " +
serviceInterfaces + ". Consider specifying @TikaComponent(contextKey=...) " +
"to select which one to use as ParseContext key.", element);
}
// Add :default marker if defaultFor is specified
if (defaultFor != null) {
indexValue = indexValue + ":default";
}
// Components that don't implement any known service interface go to parse-context.idx
// These should specify their contextKey explicitly via @TikaComponent(contextKey=...)
if (serviceInterfaces.isEmpty()) {
// Put in parse-context.idx
messager.printMessage(Diagnostic.Kind.NOTE,
"Class " + className + " is a parse-context component, " +
"adding to parse-context.idx", element);
Map<String, String> index = indexFiles.computeIfAbsent("parse-context",
k -> new LinkedHashMap<>());
addToIndex(index, componentName, indexValue, className, element);
}
// Process SPI service interfaces (these also get their own idx files)
for (String serviceInterface : serviceInterfaces) {
// Add to SPI services only if spi = true
if (includeSpi) {
spiServices.computeIfAbsent(serviceInterface, k -> new LinkedHashSet<>())
.add(className);
}
// Always add to index files for name-based lookup, regardless of spi value
String indexFileName = SERVICE_INTERFACES.get(serviceInterface);
if (indexFileName != null) {
Map<String, String> index = indexFiles.computeIfAbsent(indexFileName,
k -> new LinkedHashMap<>());
addToIndex(index, componentName, indexValue, className, element);
}
}
}
/**
* Adds an entry to an index, checking for duplicates.
*/
private void addToIndex(Map<String, String> index, String componentName,
String indexValue, String className, TypeElement element) {
if (index.containsKey(componentName)) {
String existingValue = index.get(componentName);
// Extract class name from value (may have :key= suffix)
String existingClass = existingValue.contains(":")
? existingValue.substring(0, existingValue.indexOf(":"))
: existingValue;
if (!existingClass.equals(className)) {
messager.printMessage(Diagnostic.Kind.ERROR,
"Duplicate component name '" + componentName + "' for classes: " +
existingClass + " and " + className, element);
}
} else {
index.put(componentName, indexValue);
}
}
/**
* Gets the contextKey value from the annotation using the mirror API.
* Returns null if contextKey is void.class (the default).
*/
private String getContextKeyFromAnnotation(TypeElement element) {
return getClassAttributeFromAnnotation(element, "contextKey");
}
/**
* Gets the defaultFor value from the annotation using the mirror API.
* Returns null if defaultFor is void.class (the default).
*/
private String getDefaultForFromAnnotation(TypeElement element) {
return getClassAttributeFromAnnotation(element, "defaultFor");
}
/**
* Gets a Class-typed attribute value from the annotation using the mirror API.
* Returns null if the attribute is void.class (the default).
*/
private String getClassAttributeFromAnnotation(TypeElement element, String attributeName) {
for (AnnotationMirror mirror : element.getAnnotationMirrors()) {
DeclaredType annotationType = mirror.getAnnotationType();
if (annotationType.toString().equals(TikaComponent.class.getName())) {
for (Map.Entry<? extends ExecutableElement, ? extends AnnotationValue> entry
: mirror.getElementValues().entrySet()) {
if (entry.getKey().getSimpleName().toString().equals(attributeName)) {
// The value is a TypeMirror for Class types
Object value = entry.getValue().getValue();
if (value instanceof TypeMirror) {
String typeName = value.toString();
// void.class is the default, meaning "not specified"
if (!"void".equals(typeName) && !"java.lang.Void".equals(typeName)) {
return typeName;
}
}
}
}
}
}
return null;
}
/**
* Finds all Tika service interfaces implemented by the given type element.
*/
private List<String> findServiceInterfaces(TypeElement element) {
List<String> result = new ArrayList<>();
Set<String> visited = new LinkedHashSet<>();
findInterfacesRecursive(element.asType(), result, visited, SERVICE_INTERFACES.keySet());
return result;
}
/**
* Recursively searches for interfaces in the type hierarchy.
*
* @param type the type to search from
* @param result list to add found interfaces to
* @param visited set of already visited types (to avoid infinite loops)
* @param targetInterfaces the set of interface names to look for
*/
private void findInterfacesRecursive(TypeMirror type, List<String> result,
Set<String> visited, Set<String> targetInterfaces) {
if (type == null || !(type instanceof DeclaredType)) {
return;
}
DeclaredType declaredType = (DeclaredType) type;
TypeElement typeElement = (TypeElement) declaredType.asElement();
String typeName = typeElement.getQualifiedName().toString();
// Avoid infinite loops
if (!visited.add(typeName)) {
return;
}
// Check if this is a target interface
if (targetInterfaces.contains(typeName)) {
if (!result.contains(typeName)) {
result.add(typeName);
}
}
// Check superclass
TypeMirror superclass = typeElement.getSuperclass();
findInterfacesRecursive(superclass, result, visited, targetInterfaces);
// Check interfaces
for (TypeMirror interfaceType : typeElement.getInterfaces()) {
findInterfacesRecursive(interfaceType, result, visited, targetInterfaces);
}
}
/**
* Writes META-INF/services files for Java SPI.
*/
private void writeServiceFiles() {
for (Map.Entry<String, Set<String>> entry : spiServices.entrySet()) {
String serviceInterface = entry.getKey();
Set<String> implementations = entry.getValue();
// Sort implementations alphabetically for deterministic output
List<String> sortedImplementations = new ArrayList<>(implementations);
Collections.sort(sortedImplementations);
try {
FileObject file = filer.createResource(StandardLocation.CLASS_OUTPUT, "",
"META-INF/services/" + serviceInterface);
try (Writer writer = file.openWriter()) {
writeApacheLicenseHeader(writer);
writer.write("\n\n");
writer.write("# Generated by TikaComponentProcessor\n");
writer.write("# Do not edit manually\n");
for (String impl : sortedImplementations) {
writer.write(impl);
writer.write("\n");
}
}
messager.printMessage(Diagnostic.Kind.NOTE,
"Generated SPI file: META-INF/services/" + serviceInterface +
" with " + implementations.size() + " implementations");
} catch (IOException e) {
messager.printMessage(Diagnostic.Kind.ERROR,
"Failed to write SPI file for " + serviceInterface + ": " + e.getMessage());
}
}
}
/**
* Writes META-INF/tika/*.idx files for name-based component lookup.
*/
private void writeIndexFiles() {
for (Map.Entry<String, Map<String, String>> entry : indexFiles.entrySet()) {
String fileName = entry.getKey();
Map<String, String> components = entry.getValue();
try {
FileObject file = filer.createResource(StandardLocation.CLASS_OUTPUT, "",
"META-INF/tika/" + fileName + ".idx");
try (Writer writer = file.openWriter()) {
writeApacheLicenseHeader(writer);
writer.write("# Generated by TikaComponentProcessor\n");
writer.write("# Do not edit manually\n");
writer.write("# Format: component-name=fully.qualified.ClassName[:key=contextKeyClass]\n");
for (Map.Entry<String, String> component : components.entrySet()) {
writer.write(component.getKey());
writer.write("=");
writer.write(component.getValue());
writer.write("\n");
}
}
messager.printMessage(Diagnostic.Kind.NOTE,
"Generated index file: META-INF/tika/" + fileName + ".idx" +
" with " + components.size() + " components");
} catch (IOException e) {
messager.printMessage(Diagnostic.Kind.ERROR,
"Failed to write index file " + fileName + ": " + e.getMessage());
}
}
}
/**
* Writes the Apache License 2.0 header to a file.
*/
private void writeApacheLicenseHeader(Writer writer) throws IOException {
String header = """
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""";
writer.write(header);
}
}