FilenameUtilsTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.io;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import org.junit.jupiter.api.Test;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.utils.StringUtils;
public class FilenameUtilsTest {
/**
* Different filesystems and operating systems have different restrictions
* on the name that can be used for files and directories.
* FilenameUtils.normalize() returns a cross platform file name that turns
* special characters in a HEX based code convention. This is %<code>.
* For example why?.zip will be converted into why%3F.zip
*
* @see http://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
* <p>
* Reserved chars are the ones in FilenameUtils.RESERVED_FILENAME_CHARACTERS:
*/
@Test
public void normalizeNothingTodo() throws Exception {
final String TEST_NAME = "test.zip";
assertEquals(TEST_NAME, FilenameUtils.normalize(TEST_NAME));
}
@Test
public void normalizeWithNull() throws Exception {
try {
FilenameUtils.normalize(null);
fail("missing check for null parameters");
} catch (IllegalArgumentException x) {
assertTrue(x.getMessage() != null && x.getMessage().contains("name"));
assertTrue(x.getMessage() != null && x.getMessage().contains("not be null"));
}
}
@Test
public void normalizeWithReservedChar() throws Exception {
final String[] TEST_NAMES = {"test?.txt", "?test.txt", "test.txt?", "?test?txt?"};
final String[] EXPECTED_NAMES =
{"test%3F.txt", "%3Ftest.txt", "test.txt%3F", "%3Ftest%3Ftxt%3F"};
for (int i = 0; i < TEST_NAMES.length; ++i) {
//System.out.println("checking " + TEST_NAMES[i]);
assertEquals(EXPECTED_NAMES[i], FilenameUtils.normalize(TEST_NAMES[i]));
}
}
@Test
public void normalizeWithReservedChars() throws Exception {
final String TEST_NAME = "?a/b\nc\td\re*f\\g:h<i>j.txt|";
final String EXPECTED_NAME = "%3Fa/b%0Ac%09d%0De%2Af\\g%3Ah%3Ci%3Ej.txt%7C";
assertEquals(EXPECTED_NAME, FilenameUtils.normalize(TEST_NAME));
}
@Test
public void normalizeWithNotPrintableChars() throws Exception {
final String TEST_NAME = new String(
new char[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, '.', 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
final String EXPECTED_NAME = "%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" + "." +
"%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F";
assertEquals(EXPECTED_NAME, FilenameUtils.normalize(TEST_NAME));
}
@Test
public void testGetName() throws Exception {
testFilenameEquality("quick.ppt", "C:\\the\\quick.ppt");
testFilenameEquality("quick.ppt", "/the/quick.ppt");
testFilenameEquality("", "/the/quick/");
testFilenameEquality("", "~/the/quick////\\\\//");
testFilenameEquality("~~quick", "~~quick");
testFilenameEquality("quick.ppt", "quick.ppt");
testFilenameEquality("", "////");
testFilenameEquality("", "C:////");
testFilenameEquality("", "..");
testFilenameEquality("quick", "C:////../the/D:/quick");
testFilenameEquality("file.ppt", "path:to:file.ppt");
testFilenameEquality("HW.txt", "_1457338542/HW.txt");
}
@Test
public void testExtension() throws Exception {
assertEquals(".pdf", FilenameUtils.getSuffixFromPath("blah/blah/or/something.pdf"));
assertEquals(StringUtils.EMPTY, FilenameUtils.getSuffixFromPath("blah \" blaoh .5\""));
}
private void testFilenameEquality(String expected, String path) {
assertEquals(expected, FilenameUtils.getName(path));
}
@Test
public void testEmbeddedFileNames() throws Exception {
String n = "the quick brown fox.docx";
assertEquals(n, sanitizeFilename(n));
assertEquals(n, sanitizeFilename(n.substring(0, n.length() - 5),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
assertEquals(n, sanitizeFilename("the quick\u0000brown fox.docx"));
assertEquals(n, sanitizeFilename(n.substring(0, n.length() - 5),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
assertEquals("the quick brown fox.bin", sanitizeFilename(n.substring(0, n.length() - 5)));
assertEquals("brown fox.docx", sanitizeFilename("the quick..\\brown fox.docx"));
assertEquals("brown fox.docx", sanitizeFilename("the quick..\\/\\/\\brown fox.docx"));
assertEquals("brown fox.docx", sanitizeFilename("the quick../brown fox.docx"));
assertEquals("_brown fox.docx", sanitizeFilename("the quick../..brown fox.docx"));
assertEquals("brown_ fox.docx", sanitizeFilename("the quick../brown.. fox.docx"));
assertEquals("brown_. fox.docx", sanitizeFilename("the quick../brown... fox.docx"));
assertEquals("brown_ fox.docx", sanitizeFilename("the quick../brown.... fox.docx"));
assertEquals("_brown fox.docx", sanitizeFilename("...brown fox.docx"));
assertEquals("_brown fox.docx", sanitizeFilename("....brown fox.docx"));
assertEquals("_brown fox.docx", sanitizeFilename(".brown fox.docx"));
assertEquals("abcdefghijklmnopqrstuvwxyz_abcdefghijklmno....docx", sanitizeFilename(
"abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz.docx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("C:\\the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("/the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("~/the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("https://the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("https://tika.apache.org/the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("file:///tika.apache.org/the quick brown fox.xlsx"));
assertEquals("brown fox.xlsx", sanitizeFilename("a:/the quick:brown fox.xlsx"));
assertEquals("_the quick brown fox.xlsx", sanitizeFilename("C:\\a/b/c/..the quick brown fox.xlsx"));
assertEquals("_the quick brown fox.xlsx", sanitizeFilename("~/a/b/c/.the quick brown fox.xlsx"));
assertEquals("the quick%3Ebrown fox.xlsx", sanitizeFilename("the quick>brown fox.xlsx"));
assertEquals("the quick%22brown fox.xlsx", sanitizeFilename("the quick\"brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizeFilename("\"the quick brown fox.xlsx\""));
assertEquals("_.docx", sanitizeFilename("..................docx"));
assertEquals("_.docx", sanitizeFilename("..docx"));
assertNull(sanitizeFilename(".docx"));
assertNull(sanitizeFilename(""));
assertNull(sanitizeFilename(null));
assertNull(sanitizeFilename("/"));
assertNull(sanitizeFilename("~/"));
assertNull(sanitizeFilename("C:"));
assertNull(sanitizeFilename("C:/"));
assertNull(sanitizeFilename("C:\\"));
}
@Test
public void testEmbeddedFilePaths() throws Exception {
String n = "the quick brown fox.docx";
assertEquals(n, sanitizePath(n));
assertEquals(n, sanitizePath(n.substring(0, n.length() - 5),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
assertEquals(n, sanitizeFilename("the quick\u0000brown fox.docx"));
assertEquals("the quick brown fox.bin", sanitizePath(n.substring(0, n.length() - 5)));
assertEquals("the quick/brown fox.docx", sanitizePath("the quick..\\brown fox.docx"));
assertEquals("the quick/brown fox.docx", sanitizePath("the quick..\\/\\/\\brown fox.docx"));
assertEquals("the quick/brown fox.docx", sanitizePath("the quick../brown fox.docx"));
assertEquals("the quick/_brown fox.docx", sanitizePath("the quick../..brown fox.docx"));
assertEquals("the quick/brown. fox.docx", sanitizePath("the quick../brown.. fox.docx"));
assertEquals("the quick/brown. fox.docx", sanitizePath("the quick../brown... fox.docx"));
assertEquals("the quick/brown. fox.docx", sanitizePath("the quick../brown.... fox.docx"));
assertEquals("_brown fox.docx", sanitizePath("...brown fox.docx"));
assertEquals("_brown fox.docx", sanitizePath("....brown fox.docx"));
assertEquals("_brown fox.docx", sanitizePath(".brown fox.docx"));
assertEquals("abcdefghijklmnopqrstuvwxyz_abcdefghijklmno....docx", sanitizePath(
"abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz.docx"));
assertEquals("the quick brown fox.xlsx", sanitizePath("C:\\the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizePath("/the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizePath("~/the quick brown fox.xlsx"));
assertEquals("the quick brown fox.xlsx", sanitizePath("https://the quick brown fox.xlsx"));
assertEquals("tika.apache.org/the quick brown fox.xlsx", sanitizePath("https://tika.apache.org/the quick brown fox.xlsx"));
assertEquals("tika.apache.org/the quick brown fox.xlsx", sanitizePath("file:///tika.apache.org/the quick brown fox.xlsx"));
assertEquals("the quick/brown fox.xlsx", sanitizePath("a:/the quick:brown fox.xlsx"));
assertEquals("a/b/c/_the quick brown fox.xlsx", sanitizePath("C:\\a/b/c/..the quick brown fox.xlsx"));
assertEquals("a/b/c/_the quick brown fox.xlsx", sanitizePath("~/a/b/c/.the quick brown fox.xlsx"));
assertEquals(".docx", sanitizePath("..................docx"));
assertEquals(".docx", sanitizePath("..docx"));
assertEquals(".docx", sanitizePath(".docx"));
assertNull(sanitizePath(""));
assertNull(sanitizePath(null));
assertNull(sanitizePath("/"));
assertNull(sanitizePath("~/"));
assertNull(sanitizePath("C:"));
assertNull(sanitizePath("C:/"));
assertNull(sanitizePath("C:\\"));
}
private String sanitizePath(String name) {
return FilenameUtils.getSanitizedEmbeddedFilePath(getMetadata(name), ".bin", 50);
}
private String sanitizePath(String name, String mimeType) {
return FilenameUtils.getSanitizedEmbeddedFilePath(getMetadata(name, mimeType), ".bin", 50);
}
private String sanitizeFilename(String name, String mimeType) {
return FilenameUtils.getSanitizedEmbeddedFileName(getMetadata(name, mimeType), ".bin", 50);
}
private String sanitizeFilename(String name) {
return FilenameUtils.getSanitizedEmbeddedFileName(getMetadata(name), ".bin", 50);
}
private Metadata getMetadata(String name, String contentType) {
Metadata metadata = getMetadata(name);
metadata.set(Metadata.CONTENT_TYPE, contentType);
return metadata;
}
private Metadata getMetadata(String name) {
Metadata metadata = new Metadata();
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_PATH, name);
return metadata;
}
}