HwpV5ParserTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.hwp;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.junit.jupiter.api.Test;
import org.apache.tika.MultiThreadedTikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.RecursiveParserWrapper;
public class HwpV5ParserTest extends MultiThreadedTikaTest {
@Test
public void testHwpV5Parser() throws Exception {
for (Parser parser : new Parser[]{new HwpV5Parser(), AUTO_DETECT_PARSER}) {
XMLResult result = getXML("testHWP-v5b.hwp", parser);
assertContains("<p>Apache Tika - \uCEE8\uD150\uCE20", result.xml);
Metadata metadata = result.metadata;
assertEquals("application/x-hwp-v5", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
assertEquals("SooMyung Lee", metadata.get(TikaCoreProperties.CREATOR));
assertContains("Apache Tika", result.xml);
}
}
@Test
public void testDistributedHwp() throws Exception {
XMLResult result = getXML("testHWP-v5-dist.hwp");
String content = result.xml;
assertContains("<p>Apache Tika - \uCEE8\uD150\uCE20", content);
assertEquals("application/x-hwp-v5", result.metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Apache Tika", result.metadata.get(TikaCoreProperties.TITLE));
assertEquals("SooMyung Lee", result.metadata.get(TikaCoreProperties.CREATOR));
}
@Test
public void testExisting() throws Exception {
XMLResult result = getXML("testHWP_5.0.hwp");
String content = result.xml;
Metadata metadata = result.metadata;
assertContains("\uD14C\uC2A4\uD2B8", content);
assertContains("test", content);
assertEquals("next1009", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("\uD14C\uC2A4\uD2B8", metadata.get(TikaCoreProperties.TITLE));
}
@Test
public void testMultiThreadedSkipFully() throws Exception {
//TIKA-3092
int numThreads = 2;
int numIterations = 50;
ParseContext[] parseContexts = new ParseContext[numThreads];
testMultiThreaded(new RecursiveParserWrapper(AUTO_DETECT_PARSER), parseContexts, numThreads,
numIterations, new RegexFileFilter(".*\\.hwp"));
}
}