TailStream.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.io;

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;

/**
 * <p>
 * A specialized input stream implementation which records the last portion read
 * from an underlying stream.
 * </p>
 * <p>
 * This stream implementation is useful to deal with information which is known
 * to be located at the end of a stream (e.g. ID3 v1 tags). While reading bytes
 * from the underlying stream, a given number of bytes is kept in an internal
 * buffer. This buffer can then be queried after the whole stream was read. It
 * contains the last bytes read from the original input stream.
 * </p>
 *
 * @param in       the underlying input stream
 * @param tailSize the size of the tail buffer
 */
public class TailStream extends FilterInputStream {
    /**
     * Constant for the default skip buffer size.
     */
    private static final int SKIP_SIZE = 4096;

    /**
     * The buffer in which the tail data is stored.
     */
    private final byte[] tailBuffer;

    /**
     * The size of the internal tail buffer.
     */
    private final int tailSize;

    /**
     * A copy of the internal tail buffer used for mark() operations.
     */
    private byte[] markBuffer;

    /**
     * The number of bytes that have been read so far.
     */
    private long bytesRead;

    /**
     * The number of bytes read at the last mark() operation.
     */
    private long markBytesRead;

    /**
     * The current index into the tail buffer.
     */
    private int currentIndex;

    /**
     * A copy of the current index used for mark() operations.
     */
    private int markIndex;

    /**
     * Creates a new instance of {@code TailStream}.
     *
     * @param in   the underlying input stream
     * @param size the size of the tail buffer
     */
    public TailStream(InputStream in, int size) {
        super(in);
        tailSize = size;
        tailBuffer = new byte[size];
    }

    /**
     * {@inheritDoc} This implementation adds the read byte to the internal tail
     * buffer.
     */
    @Override
    public int read() throws IOException {
        int c = super.read();
        if (c != -1) {
            appendByte((byte) c);
        }
        return c;
    }

    /**
     * {@inheritDoc} This implementation delegates to the underlying stream and
     * then adds the correct portion of the read buffer to the internal tail
     * buffer.
     */
    @Override
    public int read(byte[] buf) throws IOException {
        int read = super.read(buf);
        if (read > 0) {
            appendBuf(buf, 0, read);
        }
        return read;
    }

    /**
     * {@inheritDoc} This implementation delegates to the underlying stream and
     * then adds the correct portion of the read buffer to the internal tail
     * buffer.
     */
    @Override
    public int read(byte[] buf, int ofs, int length) throws IOException {
        int read = super.read(buf, ofs, length);
        if (read > 0) {
            appendBuf(buf, ofs, read);
        }
        return read;
    }

    /**
     * {@inheritDoc} This implementation delegates to the {@code read()} method
     * to ensure that the tail buffer is also filled if data is skipped.
     */
    @Override
    public long skip(long n) throws IOException {
        int bufSize = (int) Math.min(n, SKIP_SIZE);
        byte[] buf = new byte[bufSize];
        long bytesSkipped = 0;
        int bytesRead = 0;

        while (bytesSkipped < n && bytesRead != -1) {
            int len = (int) Math.min(bufSize, n - bytesSkipped);
            bytesRead = read(buf, 0, len);
            if (bytesRead != -1) {
                bytesSkipped += bytesRead;
            }
        }

        return (bytesRead < 0 && bytesSkipped == 0) ? -1 : bytesSkipped;
    }

    /**
     * {@inheritDoc} This implementation saves the internal state including the
     * content of the tail buffer so that it can be restored when ''reset()'' is
     * called later.
     */
    @Override
    public void mark(int limit) {
        markBuffer = new byte[tailSize];
        System.arraycopy(tailBuffer, 0, markBuffer, 0, tailSize);
        markIndex = currentIndex;
        markBytesRead = bytesRead;
    }

    /**
     * {@inheritDoc} This implementation restores this stream's state to the
     * state when ''mark()'' was called the last time. If ''mark()'' has not
     * been called before, this method has no effect.
     */
    @Override
    public void reset() {
        if (markBuffer != null) {
            System.arraycopy(markBuffer, 0, tailBuffer, 0, tailSize);
            currentIndex = markIndex;
            bytesRead = markBytesRead;
        }
    }

    /**
     * Returns an array with the last data read from the underlying stream. If
     * the underlying stream contained more data than the ''tailSize''
     * constructor argument, the returned array has a length of ''tailSize''.
     * Otherwise, its length equals the number of bytes read.
     *
     * @return an array with the last data read from the underlying stream
     */
    public byte[] getTail() {
        int size = (int) Math.min(tailSize, bytesRead);
        byte[] result = new byte[size];
        System.arraycopy(tailBuffer, currentIndex, result, 0, size - currentIndex);
        System.arraycopy(tailBuffer, 0, result, size - currentIndex, currentIndex);
        return result;
    }

    /**
     * Adds the given byte to the internal tail buffer.
     *
     * @param b the byte to be added
     */
    private void appendByte(byte b) {
        tailBuffer[currentIndex++] = b;
        if (currentIndex >= tailSize) {
            currentIndex = 0;
        }
        bytesRead++;
    }

    /**
     * Adds the content of the given buffer to the internal tail buffer.
     *
     * @param buf    the buffer
     * @param ofs    the start offset in the buffer
     * @param length the number of bytes to be copied
     */
    private void appendBuf(byte[] buf, int ofs, int length) {
        if (length >= tailSize) {
            replaceTailBuffer(buf, ofs, length);
        } else {
            copyToTailBuffer(buf, ofs, length);
        }

        bytesRead += length;
    }

    /**
     * Replaces the content of the internal tail buffer by the last portion of
     * the given buffer. This method is called if a buffer was read from the
     * underlying stream whose length is larger than the tail buffer.
     *
     * @param buf    the buffer
     * @param ofs    the start offset in the buffer
     * @param length the number of bytes to be copied
     */
    private void replaceTailBuffer(byte[] buf, int ofs, int length) {
        System.arraycopy(buf, ofs + length - tailSize, tailBuffer, 0, tailSize);
        currentIndex = 0;
    }

    /**
     * Copies the given buffer into the internal tail buffer at the current
     * position. This method is called if a buffer is read from the underlying
     * stream whose length is smaller than the tail buffer. In this case the
     * tail buffer is only partly overwritten.
     *
     * @param buf    the buffer
     * @param ofs    the start offset in the buffer
     * @param length the number of bytes to be copied
     */
    private void copyToTailBuffer(byte[] buf, int ofs, int length) {
        int remaining = tailSize - currentIndex;
        int size1 = Math.min(remaining, length);
        System.arraycopy(buf, ofs, tailBuffer, currentIndex, size1);
        System.arraycopy(buf, ofs + size1, tailBuffer, 0, length - size1);
        currentIndex = (currentIndex + length) % tailSize;
    }
}