MP4ParserTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.mp4;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.drew.metadata.mp4.Mp4Directory;
import com.drew.metadata.mp4.media.Mp4MetaDirectory;
import com.drew.metadata.mp4.media.Mp4SoundDirectory;
import com.drew.metadata.mp4.media.Mp4VideoDirectory;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.xml.sax.ContentHandler;
import org.apache.tika.TikaTest;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMP;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
/**
* Test case for parsing mp4 files.
*/
public class MP4ParserTest extends TikaTest {
Set<String> skipKeysA = new HashSet<>();
Set<String> skipKeysB = new HashSet<>();
/*
@Before
public void setUp() {
skipKeysB.add("X-TIKA:Parsed-By");
skipKeysA.add("X-TIKA:parse_time_millis");
skipKeysB.add("X-TIKA:content_handler");
skipKeysA.add("X-TIKA:content_handler");
skipKeysB.add("X-TIKA:parse_time_millis");
skipKeysB.add("xmpDM:videoCompressor");
//skipKeysB.add("xmpDM:audioChannelType");
//skipKeysB.add("xmpDM:audioChannelType");
skipKeysA.add("X-TIKA:content");
skipKeysB.add("X-TIKA:content");
skipKeysB.add("xmpDM:copyright");
}*/
/**
* Test that we can extract information from
* a M4A MP4 Audio file
*/
@Test
public void testMP4ParsingAudio() throws Exception {
Metadata metadata = new Metadata();
String content = getText("testMP4.m4a", metadata);
// Check core properties
assertEquals("audio/mp4", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("2012-01-28T18:39:18Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals("2012-01-28T18:40:25Z", metadata.get(TikaCoreProperties.MODIFIED));
// Check the textual contents
assertContains("Test Title", content);
assertContains("Test Artist", content);
assertContains("Test Album", content);
assertContains("2008", content);
assertContains("Test Comment", content);
assertContains("Test Genre", content);
// Check XMPDM-typed audio properties
assertEquals("Test Album", metadata.get(XMPDM.ALBUM));
assertEquals("Test Artist", metadata.get(XMPDM.ARTIST));
assertEquals("Test Composer", metadata.get(XMPDM.COMPOSER));
assertEquals("2008", metadata.get(XMPDM.RELEASE_DATE));
assertEquals("Test Genre", metadata.get(XMPDM.GENRE));
assertEquals("Test Comments", metadata.get(XMPDM.LOG_COMMENT.getName()));
assertEquals("1", metadata.get(XMPDM.TRACK_NUMBER));
assertEquals("Test Album Artist", metadata.get(XMPDM.ALBUM_ARTIST));
assertEquals("6", metadata.get(XMPDM.DISC_NUMBER));
assertEquals("0", metadata.get(XMPDM.COMPILATION));
assertEquals("44100", metadata.get(XMPDM.AUDIO_SAMPLE_RATE));
assertEquals("Stereo", metadata.get(XMPDM.AUDIO_CHANNEL_TYPE));
assertEquals("M4A", metadata.get(XMPDM.AUDIO_COMPRESSOR));
assertEquals("0.07", metadata.get(XMPDM.DURATION));
assertEquals("iTunes 10.5.3.3", metadata.get(XMP.CREATOR_TOOL));
assertContains("org.apache.tika.parser.mp4.MP4Parser",
Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY)));
// Check again by file, rather than stream
TikaInputStream tstream =
TikaInputStream.get(getResourceAsStream("/test-documents/testMP4.m4a"));
tstream.getFile();
ContentHandler handler = new BodyContentHandler();
try {
AUTO_DETECT_PARSER.parse(tstream, handler, metadata, new ParseContext());
} finally {
tstream.close();
}
//TODO: why don't we check the output here?
}
// TODO Test a MP4 Video file
// TODO Test an old QuickTime Video File
@Test
@Timeout(30000)
public void testInfiniteLoop() throws Exception {
XMLResult r = getXML("testMP4_truncated.m4a");
assertEquals("audio/mp4", r.metadata.get(Metadata.CONTENT_TYPE));
assertEquals("M4A", r.metadata.get(XMPDM.AUDIO_COMPRESSOR));
}
@Test
public void testAudioOnlyMP4() throws Exception {
final XMLResult xmlResult = getXML("testMP4AudioOnly.mp4");
final Metadata metadata = xmlResult.metadata;
assertEquals("audio/mp4", metadata.get(Metadata.CONTENT_TYPE));
}
@Test
public void testAudioOnlyCheck() {
assertTrue(MP4Parser.isAudioOnly(List.of(new Mp4SoundDirectory())));
}
@Test
public void testMetadataWithSoundConsideredAudio() {
assertTrue(MP4Parser.isAudioOnly(List.of(new Mp4SoundDirectory(), new Mp4MetaDirectory())));
}
@Test
public void testVideoDirectoriesNotConsideredAudio() {
final Collection<Mp4Directory> directories =
List.of(new Mp4VideoDirectory(), new Mp4VideoDirectory(), new Mp4SoundDirectory());
assertFalse(MP4Parser.isAudioOnly(directories));
}
@Test
public void testNoDirectoriesNotConsideredAudio() {
assertFalse(MP4Parser.isAudioOnly(Collections.emptyList()));
}
/*
@Test
public void compareMetadata() throws Exception {
Path dir = Paths.get("/data/mp4s");
processDir(dir);
}
private void processDir(Path dir) {
for (File f : dir.toFile().listFiles()) {
if (f.isDirectory()) {
processDir(f.toPath());
} else {
if (! f.getName().contains("MB3EOKALN337SEYQE6WXIGMY5VQ2ZU7M")) {
// continue;
}
System.out.println(f);
processFile(f.toPath());
System.out.println("");
}
}
}
private void processFile(Path p) {
Metadata a;
Metadata b;
try {
List<Metadata> metadataList = getRecursiveMetadata(p, new LegacyMP4Parser(), true);
if (metadataList.size() > 0) {
a = metadataList.get(0);
} else {
System.out.println("a is empty");
return;
}
} catch (AssertionError | Exception e) {
e.printStackTrace();
return;
}
try {
List<Metadata> metadataList = getRecursiveMetadata(p);
if (metadataList.size() > 0) {
b = metadataList.get(0);
} else {
System.out.println("b is empty");
return;
}
} catch (Exception e) {
e.printStackTrace();
return;
}
compare(p, a, b);
}
private void compare(Path p, Metadata a, Metadata b) {
/* System.out.println("A");
debug(a);
System.out.println("B");
debug(b);
Set<String> aKeys = getKeys(a, skipKeysA);
Set<String> bKeys = getKeys(b, skipKeysB);
for (String k : aKeys) {
if (! bKeys.contains(k)) {
System.out.println("not in b: " + k + " : " + a.get(k) + " : " +
p.getFileName().toString());
}
}
for (String k : bKeys) {
if (!aKeys.contains(k)) {
System.out.println("not in a: " + k + " : " + b.get(k) + " : " +
p.getFileName().toString());
}
}
for (String k : aKeys) {
if (! bKeys.contains(k)) {
continue;
}
Set<String> aVals = getVals(a, k);
Set<String> bVals = getVals(b, k);
for (String v : aVals) {
if (!bVals.contains(v)) {
System.out.println("b missing value: " + v + " for key " + k + " in " + p.getFileName().toString());
for (String bVal : bVals) {
System.out.println("\tb has " + bVal);
}
}
}
}
}
private Set<String> getKeys(Metadata m, Set<String> skipFields) {
Set<String> keys = new HashSet<>();
for (String n : m.names()) {
if (! skipFields.contains(n)) {
keys.add(n);
}
}
return keys;
}
private Set<String> getVals(Metadata m, String k) {
Set<String> vals = new HashSet<>();
for (String v : m.getValues(k)) {
vals.add(v);
}
return vals;
} */
}