CharsetTest.java
package org.zeroturnaround.zip;
/**
* Copyright (C) 2012 ZeroTurnaround LLC <support@zeroturnaround.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import junit.framework.TestCase;
public class CharsetTest extends TestCase {
private static final File file = new File("src/test/resources/umlauts-o\u0308a\u0308s\u030c.zip");
// See StackOverFlow post why I'm not using just unicode
// http://stackoverflow.com/questions/6153345/different-utf8-encoding-in-filenames-os-x/6153713#6153713
private static final List fileContents = new ArrayList() {
{
add("umlauts-o��a��s��/");
add("umlauts-o��a��s��/Ro\u0308mer.txt"); // R��mer - but using the escape code that HFS uses
add("umlauts-o��a��s��/Raudja\u0308rv.txt"); // Raudj��rv - but escape code from HFS
add("umlauts-o��a��s��/S\u030celajev.txt"); // ��elajev - but escape code from HFS
}
};
public boolean ignoreTestIfJava6() {
return (System.getProperty("java.version").startsWith("1.6"));
}
public void testIterateWithCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
FileInputStream fis = new FileInputStream(file);
ZipUtil.iterate(fis, new ZipEntryCallback() {
public void process(InputStream in, ZipEntry zipEntry) throws IOException {
assertTrue(zipEntry.getName(), fileContents.contains(zipEntry.getName()));
}
}, Charset.forName("UTF8"));
}
public void testIterateWithEntryNamesAndCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
FileInputStream fis = new FileInputStream(file);
String[] entryNames = (String[]) fileContents.toArray(new String[] {});
ZipUtil.iterate(fis, entryNames, new ZipEntryCallback() {
public void process(InputStream in, ZipEntry zipEntry) throws IOException {
assertTrue(zipEntry.getName(), fileContents.contains(zipEntry.getName()));
}
}, Charset.forName("UTF8"));
}
public void testZipFileGetEntriesWithCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
ZipFile zf = ZipFileUtil.getZipFile(file, Charset.forName("UTF8"));
Enumeration entries = zf.entries();
while (entries.hasMoreElements()) {
ZipEntry ze = (ZipEntry) entries.nextElement();
assertTrue(ze.getName(), fileContents.contains(ze.getName()));
}
}
/*
* I'm using a archive created on Windows 10. The files in the archive have
* umlauts in their name. The default encoding in compression is IBM437 (I didn't
* know that but found out from [1]. Unpacking this archive with any other encoding
* will result in wrong filenames (windows-1252) or Zip exception during the
* getEntry() or when opening the file.
*
* [1] http://stackoverflow.com/questions/1510791/how-to-create-zip-files-with-specific-encoding
*/
public void testIterateExtractWithCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
final File src = new File("src/test/resources/windows-compressed.zip");
FileInputStream inputStream = new FileInputStream(src);
ZipUtil.iterate(inputStream, new ZipEntryCallback() {
public void process(InputStream in, ZipEntry zipEntry) throws IOException {
if (zipEntry.getName().indexOf("raud") != -1) {
assertEquals("windows-default-encoded/raudj��rv.txt", zipEntry.getName());
}
else {
assertEquals("windows-default-encoded/r��mer.txt", zipEntry.getName());
}
}
}, Charset.forName("IBM437"));
inputStream.close();
}
/*
* If a charset is not specified for the unpack then the test will just fail.
*/
public void testExtractWithCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
final File src = new File("src/test/resources/windows-compressed.zip");
File tmpDir = Files.createTempDirectory("zt-zip-tests").toFile();
ZipUtil.unpack(src, tmpDir, Charset.forName("IBM437"));
}
public void testExtractEntryWithCharset() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
final File src = new File("src/test/resources/windows-compressed.zip");
byte[] bytes = ZipUtil.unpackEntry(src, "windows-default-encoded/r��mer.txt", Charset.forName("IBM437"));
assertTrue(bytes.length > 0);
}
/*
* If a charset is not specified for the unpack then the test will just fail.
*/
public void testExtractWithCharsetUsingStream() throws Exception {
if (ignoreTestIfJava6()) {
return;
}
final File src = new File("src/test/resources/windows-compressed.zip");
FileInputStream inputStream = new FileInputStream(src);
File tmpDir = Files.createTempDirectory("zt-zip-tests").toFile();
ZipUtil.unpack(inputStream, tmpDir, Charset.forName("IBM437"));
inputStream.close();
}
}