ConfigDeserializer.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.serialization;

import java.io.IOException;

import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.tika.config.JsonConfig;
import org.apache.tika.config.loader.JsonMergeUtils;
import org.apache.tika.config.loader.TikaObjectMapperFactory;
import org.apache.tika.parser.ParseContext;

/**
 * Helper utility for {@link org.apache.tika.config.SelfConfiguring} components
 * to deserialize their configuration from ParseContext at run time.
 * <p>
 * <strong>Note for Parser Developers:</strong> Instead of calling this class directly,
 * use {@link org.apache.tika.config.ParseContextConfig} which provides the same functionality
 * but with better error handling. ParseContextConfig will throw a clear exception if
 * tika-serialization is not on the classpath.
 * <p>
 * This allows parsers to retrieve their configuration using the same friendly names
 * as in tika-config.json (e.g., "pdf-parser", "html-parser") from per-request
 * configurations sent via FetchEmitTuple or other serialization mechanisms.
 * <p>
 * The helper automatically merges user configuration with parser defaults, eliminating
 * the need for config-specific cloneAndUpdate methods.
 * <p>
 * Example usage in a parser:
 * <pre>
 * // Recommended: Use ParseContextConfig wrapper (in tika-core)
 * PDFParserConfig localConfig = ParseContextConfig.getConfig(
 *     context, "pdf-parser", PDFParserConfig.class, defaultConfig);
 * </pre>
 *
 * @see org.apache.tika.config.ParseContextConfig
 */
public class ConfigDeserializer {

    private static final ObjectMapper MAPPER = TikaObjectMapperFactory.getMapper();

    /**
     * Retrieves and deserializes a configuration from ParseContext.
     * <p>
     * Resolution order:
     * <ol>
     *   <li>Check resolved configs cache (already deserialized)</li>
     *   <li>Check JSON configs (deserialize, merge with default, cache)</li>
     *   <li>Return default config if nothing found</li>
     * </ol>
     * <p>
     * The resolved config is cached in ParseContext's resolvedConfigs map and also
     * set in the main context map so components can find it via {@code parseContext.get(configClass)}.
     *
     * @param context       the parse context
     * @param configKey     the configuration key (e.g., "pdf-parser", "handler-config")
     * @param configClass   the configuration class to deserialize into
     * @param defaultConfig optional default config to merge with user config (can be null)
     * @param <T>           the configuration type
     * @return the merged configuration, the default config if no user config found, or null if neither exists
     * @throws IOException if deserialization fails
     */
    @SuppressWarnings("unchecked")
    public static <T> T getConfig(ParseContext context, String configKey, Class<T> configClass, T defaultConfig)
            throws IOException {
        if (context == null) {
            return defaultConfig;
        }

        // Check resolved cache first
        T resolved = context.getResolvedConfig(configKey);
        if (resolved != null) {
            return resolved;
        }

        // Check for JSON config
        JsonConfig jsonConfig = context.getJsonConfig(configKey);
        if (jsonConfig == null) {
            return defaultConfig;
        }

        // Deserialize and merge with default
        T config = JsonMergeUtils.mergeWithDefaults(MAPPER, jsonConfig.json(), configClass, defaultConfig);

        // Cache in resolved configs
        context.setResolvedConfig(configKey, config);

        // Also set in main context so other components can find it via parseContext.get(configClass)
        context.set(configClass, config);

        return config;
    }

    /**
     * Retrieves and deserializes a configuration from ParseContext.
     * This version does not merge with any default config.
     *
     * @param context     the parse context
     * @param configKey   the configuration key (e.g., "pdf-parser", "handler-config")
     * @param configClass the configuration class to deserialize into
     * @param <T>         the configuration type
     * @return the deserialized configuration, or null if not found
     * @throws IOException if deserialization fails
     */
    public static <T> T getConfig(ParseContext context, String configKey, Class<T> configClass)
            throws IOException {
        return getConfig(context, configKey, configClass, null);
    }

    /**
     * Checks if a configuration exists in the ParseContext.
     *
     * @param context   the parse context
     * @param configKey the configuration key to check
     * @return true if the configuration exists (either as JSON or already resolved)
     */
    public static boolean hasConfig(ParseContext context, String configKey) {
        if (context == null) {
            return false;
        }
        return context.hasJsonConfig(configKey) || context.getResolvedConfig(configKey) != null;
    }
}