PageFilePageSource.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.pagefile;

import com.facebook.presto.common.Page;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockEncodingSerde;
import com.facebook.presto.hive.HiveColumnHandle;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.PrestoException;
import org.apache.hadoop.fs.FSDataInputStream;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import static com.facebook.presto.hive.pagefile.PageFileWriterFactory.createPagesSerdeForPageFile;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

public class PageFilePageSource
        implements ConnectorPageSource
{
    private final FSDataInputStream inputStream;
    private final Iterator<Page> pageReader;
    private final int[] hiveColumnIndexes;

    private boolean closed;
    private long completedPositions;
    private long completedBytes;
    private long readTimeNanos;
    private long memoryUsageBytes;

    public PageFilePageSource(
            FSDataInputStream inputStream,
            long start,
            long splitLength,
            long fileSize,
            BlockEncodingSerde blockEncodingSerde,
            List<HiveColumnHandle> columns)
            throws IOException
    {
        this.inputStream = requireNonNull(inputStream, "inputStream is null");
        PageFileFooterReader pageFileFooterReader = new PageFileFooterReader(inputStream, fileSize);

        OffsetAndLength readStartAndLength = getReadStartAndLength(
                start,
                splitLength,
                pageFileFooterReader.getFooterOffset(),
                pageFileFooterReader.getStripeOffsets());

        pageReader = new PageFilePageReader(
                readStartAndLength.getOffset(),
                readStartAndLength.getLength(),
                inputStream,
                createPagesSerdeForPageFile(
                        blockEncodingSerde,
                        pageFileFooterReader.getCompression()));

        int size = requireNonNull(columns, "columns is null").size();
        this.hiveColumnIndexes = new int[size];

        for (int columnIndex = 0; columnIndex < size; columnIndex++) {
            HiveColumnHandle column = columns.get(columnIndex);
            hiveColumnIndexes[columnIndex] = column.getHiveColumnIndex();
        }
    }

    @Override
    public long getCompletedBytes()
    {
        return completedBytes;
    }

    @Override
    public long getCompletedPositions()
    {
        return completedPositions;
    }

    @Override
    public long getReadTimeNanos()
    {
        return readTimeNanos;
    }

    @Override
    public boolean isFinished()
    {
        return closed || !pageReader.hasNext();
    }

    @Override
    public Page getNextPage()
    {
        if (isFinished()) {
            return null;
        }
        long start = System.nanoTime();

        Page page = pageReader.next();

        Block[] blocks = new Block[hiveColumnIndexes.length];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            if (hiveColumnIndexes[fieldId] >= page.getChannelCount()) {
                throw new PrestoException(
                        NOT_SUPPORTED,
                        "schema evolution is not supported for PageFile format");
            }
            blocks[fieldId] = page.getBlock(hiveColumnIndexes[fieldId]);
        }

        readTimeNanos += System.nanoTime() - start;
        completedPositions += page.getPositionCount();
        long pageSizeInBytes = page.getSizeInBytes();
        completedBytes += pageSizeInBytes;
        memoryUsageBytes = Math.max(memoryUsageBytes, pageSizeInBytes);
        return new Page(page.getPositionCount(), blocks);
    }

    @Override
    public long getSystemMemoryUsage()
    {
        return memoryUsageBytes;
    }

    @Override
    public void close()
            throws IOException
    {
        inputStream.close();
        closed = true;
    }

    private static OffsetAndLength getReadStartAndLength(
            long splitStart,
            long splitLength,
            long lastStripeEnd,
            List<Long> stripeOffsets)
    {
        checkArgument(stripeOffsets != null, "stripeOffsets is null, failed to read page file footer.");
        if (stripeOffsets.isEmpty()) {
            return new OffsetAndLength(0, 0);
        }

        long readStart = 0;
        long readEnd = 0;
        boolean stripeFound = false;
        for (int i = 0; i < stripeOffsets.size(); ++i) {
            if (splitContainsStripe(splitStart, splitLength, stripeOffsets.get(i))) {
                if (!stripeFound) {
                    readStart = stripeOffsets.get(i);
                    stripeFound = true;
                }
                readEnd = i == stripeOffsets.size() - 1 ? lastStripeEnd : stripeOffsets.get(i + 1);
            }
            else if (stripeFound) {
                break;
            }
        }

        return new OffsetAndLength(readStart, readEnd - readStart);
    }

    private static boolean splitContainsStripe(long splitStart, long splitLength, long stripeOffset)
    {
        return splitStart <= stripeOffset && stripeOffset < splitStart + splitLength;
    }

    private static class OffsetAndLength
    {
        private final long offset;
        private final long length;

        OffsetAndLength(long offset, long length)
        {
            this.offset = offset;
            this.length = length;
        }

        private long getOffset()
        {
            return offset;
        }

        private long getLength()
        {
            return length;
        }
    }
}