IPADetector.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.detect.zip;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.mime.MediaType;
public class IPADetector implements ZipContainerDetector {
static final MediaType IPA = MediaType.application("x-itunes-ipa");
/**
* To be considered as an IPA file, it needs to match all of these
*/
private static HashSet<Pattern> ipaEntryPatterns = new HashSet<Pattern>() {
private static final long serialVersionUID = 6545295886322115362L;
{
add(Pattern.compile("^Payload/$"));
add(Pattern.compile("^Payload/.*\\.app/$"));
add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/$"));
add(Pattern.compile("^Payload/.*\\.app/_CodeSignature/CodeResources$"));
add(Pattern.compile("^Payload/.*\\.app/Info\\.plist$"));
add(Pattern.compile("^Payload/.*\\.app/PkgInfo$"));
}
};
@Override
public MediaType detect(ZipFile zip, TikaInputStream tis) throws IOException {
// Note - consider generalising this logic, if another format needs many regexp matching
TmpPatterns tmpPatterns = new TmpPatterns();
Enumeration<ZipArchiveEntry> entries = zip.getEntries();
while (entries.hasMoreElements()) {
ZipArchiveEntry entry = entries.nextElement();
String name = entry.getName();
tmpPatterns.patterns.removeIf(pattern -> pattern.matcher(name).matches());
if (tmpPatterns.patterns.isEmpty()) {
// We've found everything we need to find
return MediaType.application("x-itunes-ipa");
}
}
// If we get here, not all required entries were found
return null;
}
@Override
public MediaType streamingDetectUpdate(ZipArchiveEntry zae, InputStream zis,
StreamingDetectContext detectContext) {
String name = zae.getName();
TmpPatterns tmp = detectContext.get(TmpPatterns.class);
if (tmp == null) {
tmp = new TmpPatterns();
detectContext.set(TmpPatterns.class, tmp);
}
tmp.patterns.removeIf(pattern -> pattern.matcher(name).matches());
if (tmp.patterns.isEmpty()) {
// We've found everything we need to find
return IPA;
}
return null;
}
@Override
public MediaType streamingDetectFinal(StreamingDetectContext detectContext) {
TmpPatterns tmp = detectContext.get(TmpPatterns.class);
if (tmp == null) {
return null;
}
if (tmp.patterns.isEmpty()) {
// We've found everything we need to find
return IPA;
}
detectContext.remove(TmpPatterns.class);
return null;
}
private static class TmpPatterns {
Set<Pattern> patterns = (Set<Pattern>) ipaEntryPatterns.clone();
}
}