MyCustomImageGraphicsEngineFactory.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.pdf;

import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDPage;

import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.image.ImageGraphicsEngine;
import org.apache.tika.parser.pdf.image.ImageGraphicsEngineFactory;
import org.apache.tika.sax.XHTMLContentHandler;

/**
 * Example custom ImageGraphicsEngineFactory demonstrating how users can create
 * their own factory implementations with custom configuration parameters.
 * <p>
 * <b>JSON Config File Usage:</b> Use the class name string approach:
 * <pre>
 * {
 *   "pdf-parser": {
 *     "imageGraphicsEngineFactoryClass": "com.example.MyCustomFactory"
 *   }
 * }
 * </pre>
 * Note: This approach does not support custom parameters; the factory will use default values.
 * <p>
 * <b>ParseContext Serialization:</b> The {@code @JsonTypeInfo} annotation enables polymorphic
 * serialization when using tika-serialization's polymorphic ObjectMapper (e.g., for
 * ParseContext round-trip serialization). This requires the annotation on both the base
 * class and subclass for full polymorphic support.
 */
@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "@class")
public class MyCustomImageGraphicsEngineFactory extends ImageGraphicsEngineFactory {

    /**
     * Metadata key used to record that this custom factory was used during parsing.
     */
    public static final String CUSTOM_FACTORY_USED = "X-CustomGraphicsEngineFactory-Used";

    /**
     * Metadata key used to record the customParam value.
     */
    public static final String CUSTOM_PARAM_KEY = "X-CustomGraphicsEngineFactory-CustomParam";

    private String customParam = "default";

    public MyCustomImageGraphicsEngineFactory() {
        // Default constructor required for Jackson deserialization
    }

    public String getCustomParam() {
        return customParam;
    }

    public void setCustomParam(String customParam) {
        this.customParam = customParam;
    }

    @Override
    public ImageGraphicsEngine newEngine(PDPage page,
                                         int pageNumber,
                                         EmbeddedDocumentExtractor embeddedDocumentExtractor,
                                         PDFParserConfig pdfParserConfig,
                                         Map<COSStream, Integer> processedInlineImages,
                                         AtomicInteger imageCounter, XHTMLContentHandler xhtml,
                                         Metadata parentMetadata, ParseContext parseContext) {
        // Record that this custom factory was used
        parentMetadata.set(CUSTOM_FACTORY_USED, "true");
        parentMetadata.set(CUSTOM_PARAM_KEY, customParam);

        // Delegate to the default implementation
        return super.newEngine(page, pageNumber, embeddedDocumentExtractor, pdfParserConfig,
                processedInlineImages, imageCounter, xhtml, parentMetadata, parseContext);
    }
}