ChunkingFactory.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.chunking;
import java.io.IOException;
import java.util.List;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.IntermediateNodeObject;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.LeafNodeObject;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ZipHeader;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.util.ByteUtil;
/**
* This class is used to create instance of AbstractChunking.
*/
public class ChunkingFactory {
/**
* Prevents a default instance of the ChunkingFactory class from being created
*/
private ChunkingFactory() {
}
/**
* This method is used to create the instance of AbstractChunking.
*
* @param fileContent The content of the file.
* @return The instance of AbstractChunking.
*/
public static AbstractChunking createChunkingInstance(byte[] fileContent) {
if (ZipHeader.isFileHeader(fileContent, 0)) {
return new ZipFilesChunking(fileContent);
} else {
return new RDCAnalysisChunking(fileContent);
}
}
/**
* This method is used to create the instance of AbstractChunking.
*
* @param nodeObject Specify the root node object.
* @return The instance of AbstractChunking.
*/
public static AbstractChunking createChunkingInstance(IntermediateNodeObject nodeObject)
throws TikaException, IOException {
byte[] fileContent = ByteUtil.toByteArray(nodeObject.getContent());
if (ZipHeader.isFileHeader(fileContent, 0)) {
return new ZipFilesChunking(fileContent);
} else {
// For SharePoint Server 2013 compatible SUTs, always using the RDC Chunking method in
// the current test suite involved file resources.
AbstractChunking returnChunking = new SimpleChunking(fileContent);
List<LeafNodeObject> nodes = returnChunking.chunking();
if (nodeObject.intermediateNodeObjectList.size() == nodes.size()) {
boolean isDataSizeMatching = true;
for (int i = 0; i < nodes.size(); i++) {
if (nodeObject.intermediateNodeObjectList.get(i).dataSize.dataSize !=
nodes.get(i).dataSize.dataSize) {
isDataSizeMatching = false;
break;
}
}
if (isDataSizeMatching) {
return returnChunking;
}
}
// If the intermediate count number or data size does not equals, then try to use RDC chunking method.
return new RDCAnalysisChunking(fileContent);
}
}
/**
* This method is used to create the instance of AbstractChunking.
*
* @param fileContent The content of the file.
* @param chunkingMethod The type of chunking methods.
* @return The instance of AbstractChunking.
*/
public static AbstractChunking createChunkingInstance(byte[] fileContent,
ChunkingMethod chunkingMethod) {
AbstractChunking chunking;
switch (chunkingMethod) {
case RDCAnalysis:
chunking = new RDCAnalysisChunking(fileContent);
break;
case SimpleAlgorithm:
chunking = new SimpleChunking(fileContent);
break;
case ZipAlgorithm:
chunking = new ZipFilesChunking(fileContent);
break;
default:
throw new InvalidOperationException(
"Cannot support the chunking type" + chunkingMethod);
}
return chunking;
}
}