DigesterFactory.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.digest;
/**
* Factory interface for creating Digester instances.
* Implementations should be annotated with {@code @TikaComponent} and
* provide bean properties for configuration (e.g., digests).
* <p>
* Configure this factory in the "parse-context" section of tika-config.json.
* The factory is loaded into the ParseContext and used by AutoDetectParser
* during parsing to compute digests.
* <p>
* Example JSON configuration:
* <pre>
* {
* "parse-context": {
* "commons-digester-factory": {
* "digests": [
* { "algorithm": "MD5" },
* { "algorithm": "SHA256", "encoding": "BASE32" }
* ],
* "skipContainerDocumentDigest": true
* }
* }
* }
* </pre>
* <p>
* When using TikaLoader, call {@code loader.loadParseContext()} to get a
* ParseContext with the DigesterFactory already set.
*
* @see DigestDef
*/
public interface DigesterFactory {
/**
* Build a new Digester instance using the factory's configured properties.
*
* @return a new Digester instance
*/
Digester build();
/**
* Returns whether to skip digesting for container (top-level) documents.
* When true, only embedded documents (depth > 0) will be digested.
* <p>
* Default implementation returns false (digest everything).
*
* @return true if container documents should be skipped, false otherwise
*/
default boolean isSkipContainerDocumentDigest() {
return false;
}
}