FetcherManager.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.core.fetcher;
import java.io.IOException;
import java.util.Map;
import com.fasterxml.jackson.databind.JsonNode;
import org.pf4j.PluginManager;
import org.apache.tika.config.loader.TikaJsonConfig;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.pipes.api.fetcher.Fetcher;
import org.apache.tika.pipes.api.fetcher.FetcherFactory;
import org.apache.tika.pipes.api.fetcher.FetcherNotFoundException;
import org.apache.tika.pipes.core.AbstractComponentManager;
import org.apache.tika.plugins.ExtensionConfig;
/**
* Utility class to hold multiple fetchers.
* <p>
* This forbids multiple fetchers with the same pluginId.
* Fetchers are instantiated lazily on first use.
*/
public class FetcherManager extends AbstractComponentManager<Fetcher, FetcherFactory> {
private static final String CONFIG_KEY = "fetchers";
/**
* Loads a FetcherManager without allowing runtime modifications.
* Use {@link #load(PluginManager, TikaJsonConfig, boolean)} to enable runtime fetcher additions.
*
* @param pluginManager the plugin manager
* @param tikaJsonConfig the configuration
* @return a FetcherManager that does not allow runtime modifications
*/
public static FetcherManager load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig)
throws TikaConfigException, IOException {
return load(pluginManager, tikaJsonConfig, false);
}
/**
* Loads a FetcherManager with optional support for runtime modifications.
*
* @param pluginManager the plugin manager
* @param tikaJsonConfig the configuration
* @param allowRuntimeModifications if true, allows calling {@link #saveFetcher(ExtensionConfig)}
* to add fetchers at runtime
* @return a FetcherManager
*/
public static FetcherManager load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig,
boolean allowRuntimeModifications)
throws TikaConfigException, IOException {
return load(pluginManager, tikaJsonConfig, allowRuntimeModifications, null);
}
/**
* Loads a FetcherManager with optional support for runtime modifications and a custom config store.
*
* @param pluginManager the plugin manager
* @param tikaJsonConfig the configuration
* @param allowRuntimeModifications if true, allows calling {@link #saveFetcher(ExtensionConfig)}
* to add fetchers at runtime
* @param configStore custom config store implementation, or null to use default in-memory store
* @return a FetcherManager
*/
public static FetcherManager load(PluginManager pluginManager, TikaJsonConfig tikaJsonConfig,
boolean allowRuntimeModifications,
org.apache.tika.pipes.core.config.ConfigStore configStore)
throws TikaConfigException, IOException {
FetcherManager manager = new FetcherManager(pluginManager, allowRuntimeModifications);
JsonNode fetchersNode = tikaJsonConfig.getRootNode().get(CONFIG_KEY);
// Validate configuration and collect fetcher configs without instantiating
Map<String, ExtensionConfig> configs = manager.validateAndCollectConfigs(pluginManager, fetchersNode);
if (configStore != null) {
return new FetcherManager(pluginManager, configs, allowRuntimeModifications, configStore);
}
return new FetcherManager(pluginManager, configs, allowRuntimeModifications);
}
private FetcherManager(PluginManager pluginManager, boolean allowRuntimeModifications) {
super(pluginManager, Map.of(), allowRuntimeModifications);
}
private FetcherManager(PluginManager pluginManager, Map<String, ExtensionConfig> fetcherConfigs,
boolean allowRuntimeModifications) {
super(pluginManager, fetcherConfigs, allowRuntimeModifications);
}
private FetcherManager(PluginManager pluginManager, Map<String, ExtensionConfig> fetcherConfigs,
boolean allowRuntimeModifications,
org.apache.tika.pipes.core.config.ConfigStore configStore) {
super(pluginManager, fetcherConfigs, allowRuntimeModifications, configStore);
}
@Override
protected String getConfigKey() {
return CONFIG_KEY;
}
@Override
protected Class<FetcherFactory> getFactoryClass() {
return FetcherFactory.class;
}
@Override
protected String getComponentName() {
return "fetcher";
}
@Override
protected TikaException createNotFoundException(String message) {
return new FetcherNotFoundException(message);
}
/**
* Gets a fetcher by ID, lazily instantiating it if needed.
*
* @param id the fetcher ID
* @return the fetcher
* @throws FetcherNotFoundException if no fetcher with the given ID exists
* @throws IOException if there's an error building the fetcher
* @throws TikaException if there's a configuration error
*/
public Fetcher getFetcher(String id) throws IOException, TikaException {
return getComponent(id);
}
/**
* Convenience method that returns a fetcher if only one fetcher
* is configured. If 0 or > 1 fetchers are configured, this throws an IllegalArgumentException.
*
* @return the single configured fetcher
* @throws IOException if there's an error building the fetcher
* @throws TikaException if there's a configuration error
*/
public Fetcher getFetcher() throws IOException, TikaException {
return getComponent();
}
/**
* Dynamically adds or updates a fetcher configuration at runtime.
* The fetcher will not be instantiated until it is first requested via {@link #getFetcher(String)}.
* This allows for dynamic configuration without the overhead of immediate instantiation.
* If a fetcher with the same ID already exists, it will be replaced and the cached instance cleared.
* <p>
* This method is only available if the FetcherManager was loaded with
* {@link #load(PluginManager, TikaJsonConfig, boolean)} with allowRuntimeModifications=true.
* <p>
* Only authorized/authenticated users should be allowed to modify fetchers. BE CAREFUL.
*
* @param config the extension configuration for the fetcher
* @throws TikaConfigException if the fetcher type is unknown or if runtime modifications are not allowed
* @throws IOException if there is an error accessing the plugin manager
*/
public void saveFetcher(ExtensionConfig config) throws TikaConfigException, IOException {
saveComponent(config);
}
/**
* Deletes a fetcher configuration by ID.
*
* @param fetcherId the fetcher ID to delete
* @throws TikaConfigException if runtime modifications are not allowed or fetcher not found
*/
public void deleteFetcher(String fetcherId) throws TikaConfigException {
deleteComponent(fetcherId);
}
/**
* Gets the configuration for a specific fetcher by ID.
*
* @param fetcherId the fetcher ID
* @return the fetcher configuration, or null if not found
*/
public ExtensionConfig getConfig(String fetcherId) {
return getComponentConfig(fetcherId);
}
}