MSOneStoreParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.onenote.fsshttpb;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.CellManifestDataElementData;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.DataElement;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.DataElementPackage;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.ObjectGroupDataElementData;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestDataElementData;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestObjectGroupReferences;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionManifestRootDeclare;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.RevisionStoreObjectGroup;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexCellMapping;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexDataElementData;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageIndexRevisionMapping;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.StorageManifestDataElementData;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.CellID;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.DataElementType;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.ExGuid;
import org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.basic.HeaderCell;
public class MSOneStoreParser {
private final Set<CellID> storageIndexHashTab = new HashSet<>();
// The DataElements of Storage Index
private List<DataElement> storageIndexDataElements;
// The DataElements of Storage Manifest
private List<DataElement> storageManifestDataElements;
// The DataElements of Cell Manifest
private List<DataElement> cellManifestDataElements;
// The DataElements of Revision Manifest
private List<DataElement> revisionManifestDataElements;
// The DataElements of Object Group Data
private List<DataElement> objectGroupDataElements;
// The DataElements of Object BLOB
private List<DataElement> objectBlOBElements;
public MSOneStorePackage parse(DataElementPackage dataElementPackage) throws IOException {
MSOneStorePackage msOneStorePackage = new MSOneStorePackage();
storageIndexDataElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.StorageIndexDataElementData)
.collect(Collectors.toList());
storageManifestDataElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.StorageManifestDataElementData)
.collect(Collectors.toList());
cellManifestDataElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.CellManifestDataElementData)
.collect(Collectors.toList());
revisionManifestDataElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.RevisionManifestDataElementData)
.collect(Collectors.toList());
objectGroupDataElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.ObjectGroupDataElementData)
.collect(Collectors.toList());
objectBlOBElements = dataElementPackage.dataElements.stream()
.filter(d -> d.dataElementType == DataElementType.ObjectDataBLOBDataElementData)
.collect(Collectors.toList());
msOneStorePackage.storageIndex =
(StorageIndexDataElementData) storageIndexDataElements.get(0).data;
msOneStorePackage.storageManifest =
(StorageManifestDataElementData) storageManifestDataElements.get(0).data;
// Parse Header Cell
CellID headerCellID =
msOneStorePackage.storageManifest.storageManifestRootDeclareList.get(0).cellID;
StorageIndexCellMapping headerCellStorageIndexCellMapping =
msOneStorePackage.findStorageIndexCellMapping(headerCellID);
storageIndexHashTab.add(headerCellID);
if (headerCellStorageIndexCellMapping != null) {
msOneStorePackage.headerCellCellManifest =
this.findCellManifest(headerCellStorageIndexCellMapping.cellMappingExGuid);
StorageIndexRevisionMapping headerCellRevisionManifestMapping =
msOneStorePackage.findStorageIndexRevisionMapping(
msOneStorePackage.headerCellCellManifest.cellManifestCurrentRevision
.cellManifestCurrentRevisionExGuid);
msOneStorePackage.headerCellRevisionManifest = this.findRevisionManifestDataElement(
headerCellRevisionManifestMapping.revisionMappingExGuid);
msOneStorePackage.headerCell =
this.parseHeaderCell(msOneStorePackage.headerCellRevisionManifest);
// Parse Data root
CellID dataRootCellID =
msOneStorePackage.storageManifest.storageManifestRootDeclareList.get(1).cellID;
storageIndexHashTab.add(dataRootCellID);
msOneStorePackage.dataRoot = this.parseObjectGroup(dataRootCellID, msOneStorePackage);
// Parse other data
for (StorageIndexCellMapping storageIndexCellMapping : msOneStorePackage.storageIndex
.storageIndexCellMappingList) {
if (!storageIndexHashTab.contains(storageIndexCellMapping.cellID)) {
msOneStorePackage.OtherFileNodeList.addAll(
this.parseObjectGroup(storageIndexCellMapping.cellID,
msOneStorePackage));
storageIndexHashTab.add(storageIndexCellMapping.cellID);
}
}
}
return msOneStorePackage;
}
/**
* Find the CellManifestDataElementData
*
* @param cellMappingExtendedGUID The ExGuid of Cell Mapping Extended GUID.
* @return The CellManifestDataElementData instance.
*/
private CellManifestDataElementData findCellManifest(ExGuid cellMappingExtendedGUID) {
return (CellManifestDataElementData) this.cellManifestDataElements.stream()
.filter(d -> d.dataElementExGuid.equals(cellMappingExtendedGUID)).findFirst()
.orElse(new DataElement()).data;
}
/**
* Find the Revision Manifest from Data Elements.
*
* @param revisionMappingExtendedGUID The Revision Mapping Extended GUID.
* @return Returns the instance of RevisionManifestDataElementData
*/
private RevisionManifestDataElementData findRevisionManifestDataElement(
ExGuid revisionMappingExtendedGUID) {
return (RevisionManifestDataElementData) this.revisionManifestDataElements.stream()
.filter(d -> d.dataElementExGuid.equals(revisionMappingExtendedGUID)).findFirst()
.orElse(new DataElement()).data;
}
private HeaderCell parseHeaderCell(RevisionManifestDataElementData headerCellRevisionManifest)
throws IOException {
ExGuid rootObjectId =
headerCellRevisionManifest.revisionManifestObjectGroupReferences.get(
0).objectGroupExtendedGUID;
DataElement element = this.objectGroupDataElements.stream()
.filter(d -> d.dataElementExGuid.equals(rootObjectId)).findFirst()
.orElse(new DataElement());
return HeaderCell.createInstance((ObjectGroupDataElementData) element.data);
}
private List<RevisionStoreObjectGroup> parseObjectGroup(CellID objectGroupCellID,
MSOneStorePackage msOneStorePackage)
throws IOException {
StorageIndexCellMapping storageIndexCellMapping =
msOneStorePackage.findStorageIndexCellMapping(objectGroupCellID);
CellManifestDataElementData cellManifest =
this.findCellManifest(storageIndexCellMapping.cellMappingExGuid);
List<RevisionStoreObjectGroup> objectGroups = new ArrayList<>();
msOneStorePackage.cellManifests.add(cellManifest);
StorageIndexRevisionMapping revisionMapping =
msOneStorePackage.findStorageIndexRevisionMapping(
cellManifest.cellManifestCurrentRevision.cellManifestCurrentRevisionExGuid);
RevisionManifestDataElementData revisionManifest =
findRevisionManifestDataElement(revisionMapping.revisionMappingExGuid);
msOneStorePackage.revisionManifests.add(revisionManifest);
RevisionManifestRootDeclare encryptionKeyRoot =
revisionManifest.revisionManifestRootDeclareList.stream()
.filter(r -> r.rootExGuid.equals(new ExGuid(3,
UUID.fromString("4A3717F8-1C14-49E7-9526-81D942DE1741"))))
.findFirst().orElse(null);
boolean isEncryption = encryptionKeyRoot != null;
for (RevisionManifestObjectGroupReferences objRef :
revisionManifest.revisionManifestObjectGroupReferences) {
ObjectGroupDataElementData dataObject =
(ObjectGroupDataElementData) objectGroupDataElements.stream()
.filter(d -> d.dataElementExGuid.equals(objRef.objectGroupExtendedGUID))
.findFirst().get().data;
RevisionStoreObjectGroup objectGroup =
RevisionStoreObjectGroup.createInstance(objRef.objectGroupExtendedGUID,
dataObject, isEncryption);
objectGroups.add(objectGroup);
}
return objectGroups;
}
}