DefaultEncodingDetector.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.detect;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import javax.imageio.spi.ServiceRegistry;
import org.apache.tika.config.ServiceLoader;
/**
* A composite encoding detector based on all the {@link EncodingDetector}
* implementations available through the
* {@link ServiceRegistry service provider mechanism}.
*
* <p>Loaded detectors are sorted in two tiers:
* <ol>
* <li>Base detectors (non-{@link MetaEncodingDetector}) sorted by full
* class name (non-Tika before Tika, then ascending alphabetically).
* The package ordering guarantees:
* {@code org.apache.tika.ml.*} (Mojibuster) ���
* {@code org.apache.tika.parser.*} (HTML).</li>
* <li>{@link MetaEncodingDetector} instances always run last, after all
* base detectors have collected their candidates into
* {@link EncodingDetectorContext}.</li>
* </ol></p>
*
* <p>If you need to control the order of the Detectors explicitly, construct
* your own {@link CompositeEncodingDetector} and pass in the list in the
* required order.</p>
*
* <p>{@link MetaEncodingDetector} handling (collect-all-then-arbitrate)
* is provided by {@link CompositeEncodingDetector}.</p>
*
* @since Apache Tika 1.15
*/
public class DefaultEncodingDetector extends CompositeEncodingDetector {
public DefaultEncodingDetector() {
this(new ServiceLoader(DefaultEncodingDetector.class.getClassLoader()));
}
public DefaultEncodingDetector(ServiceLoader loader) {
super(sorted(loader.loadServiceProviders(EncodingDetector.class)));
}
public DefaultEncodingDetector(ServiceLoader loader,
Collection<Class<? extends EncodingDetector>>
excludeEncodingDetectors) {
super(sorted(loader.loadServiceProviders(EncodingDetector.class)),
excludeEncodingDetectors);
}
private static List<EncodingDetector> sorted(List<EncodingDetector> detectors) {
// Two-key sort: base detectors first (meta=0) then MetaEncodingDetectors (meta=1),
// within each tier sorted by full class name for stability across JARs.
detectors.sort(Comparator
.<EncodingDetector, Integer>comparing(
d -> (d instanceof MetaEncodingDetector) ? 1 : 0)
.thenComparing(d -> d.getClass().getName()));
return detectors;
}
}