ChmPmgiHeader.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.microsoft.chm;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.util.Arrays;

import org.apache.tika.exception.TikaException;

/**
 * Description Note: not always exists An index chunk has the following format:
 * 0000: char[4] 'PMGI' 0004: DWORD Length of quickref/free area at end of
 * directory chunk 0008: Directory index entries (to quickref/free area) The
 * quickref area in an PMGI is the same as in an PMGL The format of a directory
 * index entry is as follows: BYTE: length of name BYTEs: name (UTF-8 encoded)
 * ENCINT: directory listing chunk which starts with name Encoded Integers aka
 * ENCINT An ENCINT is a variable-length integer. The high bit of each byte
 * indicates "continued to the next byte". Bytes are stored most significant to
 * least significant. So, for example, $EA $15 is (((0xEA&0x7F)<<7)|0x15) =
 * 0x3515.
 *
 * <p>
 * Note: This class is not in use
 */
public class ChmPmgiHeader implements ChmAccessor<ChmPmgiHeader> {
    private static final long serialVersionUID = -2092282339894303701L;
    private byte[] signature;
    private long free_space; /* 4 */

    /* local usage */
    private int dataRemained;
    private int currentPlace = 0;

    public ChmPmgiHeader() {
        signature = ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8); /* 0 (PMGI) */
    }

    private int getDataRemained() {
        return dataRemained;
    }

    private void setDataRemained(int dataRemained) {
        this.dataRemained = dataRemained;
    }

    private int getCurrentPlace() {
        return currentPlace;
    }

    private void setCurrentPlace(int currentPlace) {
        this.currentPlace = currentPlace;
    }

    private void unmarshalCharArray(byte[] data, ChmPmgiHeader chmPmgiHeader, int count)
            throws ChmParsingException {
        int index = -1;
        ChmAssert.assertByteArrayNotNull(data);
        ChmAssert.assertChmAccessorNotNull(chmPmgiHeader);
        ChmAssert.assertPositiveInt(count);
        this.setDataRemained(data.length);
        index = ChmCommons.indexOfDataSpaceStorageElement(data, ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8));

        if (index >= 0) {
            System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, count);
        } else {
            //Some chm documents (actually most of them) do not contain
            //PMGI header, in this case, we just notice about it.
        }
        this.setCurrentPlace(this.getCurrentPlace() + count);
        this.setDataRemained(this.getDataRemained() - count);
    }

    private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
        ChmAssert.assertByteArrayNotNull(data);

        if (4 > getDataRemained()) {
            throw new ChmParsingException("4 > dataLenght");
        }
        dest = (data[this.getCurrentPlace()] & 0xff) |
                (data[this.getCurrentPlace() + 1] & 0xff) << 8 |
                (data[this.getCurrentPlace() + 2] & 0xff) << 16 |
                (data[this.getCurrentPlace() + 3] & 0xff) << 24;

        setDataRemained(this.getDataRemained() - 4);
        this.setCurrentPlace(this.getCurrentPlace() + 4);
        return dest;
    }

    /**
     * Returns pmgi signature if exists
     *
     * @return signature
     */
    public byte[] getSignature() {
        return signature;
    }

    /**
     * Sets pmgi signature
     *
     * @param signature
     */
    protected void setSignature(byte[] signature) {
        this.signature = signature;
    }

    /**
     * Returns pmgi free space
     *
     * @return free_space
     */
    public long getFreeSpace() {
        return free_space;
    }

    /**
     * Sets pmgi free space
     *
     * @param free_space
     */
    protected void setFreeSpace(long free_space) {
        this.free_space = free_space;
    }

    /**
     * Returns textual representation of the pmgi header
     */
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("signature:=").append(new String(getSignature(), UTF_8)).append(", ");
        sb.append("free space:=").append(getFreeSpace()).append(System.getProperty("line.separator"));
        return sb.toString();
    }

    // @Override
    public void parse(byte[] data, ChmPmgiHeader chmPmgiHeader) throws TikaException {
        /* we only know how to deal with a 0x8 byte structures */
        if (data.length < ChmConstants.CHM_PMGI_LEN) {
            throw new TikaException("we only know how to deal with a 0x8 byte structures");
        }

        /* unmarshal fields */
        chmPmgiHeader.unmarshalCharArray(data, chmPmgiHeader, ChmConstants.CHM_SIGNATURE_LEN);
        chmPmgiHeader
                .setFreeSpace(chmPmgiHeader.unmarshalUInt32(data, chmPmgiHeader.getFreeSpace()));

        /* check structure */
        if (!Arrays.equals(chmPmgiHeader.getSignature(),
                ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8))) {
            throw new TikaException(
                    "it does not seem to be valid a PMGI signature, check ChmItsp index_root if " +
                            "it was -1, means no PMGI, use PMGL insted");
        }

    }
}