NameDetectorTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.detect;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
/**
* Test cases for the {@link NameDetector} class.
*/
public class NameDetectorTest {
private Detector detector;
@BeforeEach
public void setUp() {
Map<Pattern, MediaType> patterns = new HashMap<>();
patterns.put(Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE), MediaType.TEXT_PLAIN);
patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
patterns.put(Pattern.compile(".*\\.hdr"), MediaType.application("envi.hdr"));
detector = new NameDetector(patterns);
}
@Test
public void testDetect() {
assertDetect(MediaType.TEXT_PLAIN, "text.txt");
assertDetect(MediaType.TEXT_PLAIN, "text.txt "); // trailing space
assertDetect(MediaType.TEXT_PLAIN, "text.txt\n"); // trailing newline
assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt"); // URL encoded
assertDetect(MediaType.TEXT_PLAIN, "text.TXT"); // case insensitive
assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
assertDetect(MediaType.TEXT_PLAIN, "README");
assertDetect(MediaType.TEXT_PLAIN, " README "); // space around
assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n"); // other whitespace
assertDetect(MediaType.TEXT_PLAIN, "/a/README"); // leading path
assertDetect(MediaType.TEXT_PLAIN, "\\b\\README"); // windows path
assertDetect(MediaType.OCTET_STREAM, "ReadMe"); // case sensitive
assertDetect(MediaType.OCTET_STREAM, "README.NOW");
// TIKA-1928 # in the filename
assertDetect(MediaType.TEXT_PLAIN, "text.txt");
assertDetect(MediaType.TEXT_PLAIN, "text#.txt"); // # before extension
assertDetect(MediaType.TEXT_PLAIN, "text#123.txt");// # before extension
assertDetect(MediaType.TEXT_PLAIN, "text.txt#pdf");// # after extension
// TIKA-3783 # before the final .
assertDetect(MediaType.TEXT_PLAIN, "ABC#192.168.0.1#2.txt");
// Check # as URL fragment too
assertDetect(MediaType.TEXT_PLAIN, "http://foo/test.txt?1=2#pdf");
assertDetect(MediaType.TEXT_PLAIN, "http://foo/test.txt#pdf");
// tough one
assertDetect(MediaType.TEXT_PLAIN, " See http://www.example.com:1234/README.txt?a=b#c \n");
assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
assertDetect(MediaType.OCTET_STREAM, "See README"); // but not this
assertDetect(MediaType.application("envi.hdr"), "ang20150420t182050_corr_v1e_img.hdr");
// test also the zero input cases
assertDetect(MediaType.OCTET_STREAM, "");
assertDetect(MediaType.OCTET_STREAM, null);
try {
assertEquals(MediaType.OCTET_STREAM, detector.detect(null, new Metadata(), new ParseContext()));
} catch (IOException e) {
fail("NameDetector should never throw an IOException");
}
}
private void assertDetect(MediaType type, String name) {
Metadata metadata = new Metadata();
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
try {
assertEquals(type, detector.detect(null, metadata, new ParseContext()));
} catch (IOException e) {
fail("NameDetector should never throw an IOException");
}
}
}