ContentHandlerFactory.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.sax;
import java.io.Serializable;
import org.xml.sax.ContentHandler;
/**
* Factory interface for creating ContentHandler instances.
* <p>
* This is the base interface used by tika-pipes, RecursiveParserWrapper, and other
* components that need to create content handlers for in-memory content extraction.
* <p>
* For streaming output to an OutputStream, see {@link StreamingContentHandlerFactory}.
*
* @see StreamingContentHandlerFactory
* @see BasicContentHandlerFactory
*/
public interface ContentHandlerFactory extends Serializable {
/**
* Creates a new ContentHandler for extracting content.
*
* @return a new ContentHandler instance
*/
ContentHandler createHandler();
/**
* Returns the name of the handler type produced by this factory
* (e.g. {@code TEXT}, {@code MARKDOWN}, {@code HTML}, {@code XML}).
* <p>
* This value is written to
* {@link org.apache.tika.metadata.TikaCoreProperties#TIKA_CONTENT_HANDLER_TYPE}
* so that downstream components (such as the inference pipeline) can
* determine what format {@code tika:content} is in without guessing.
*
* @return handler type name, never {@code null}
*/
default String handlerTypeName() {
return "UNKNOWN";
}
}