BaseRLEBitPackedDecoder.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.parquet.batchreader.decoders.rle;
import org.apache.parquet.column.values.bitpacking.BytePacker;
import org.apache.parquet.column.values.bitpacking.Packer;
import org.apache.parquet.io.ParquetDecodingException;
import org.openjdk.jol.info.ClassLayout;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import static com.facebook.presto.parquet.batchreader.decoders.rle.BaseRLEBitPackedDecoder.Mode.PACKED;
import static com.facebook.presto.parquet.batchreader.decoders.rle.BaseRLEBitPackedDecoder.Mode.RLE;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.slice.SizeOf.sizeOf;
import static java.lang.Math.ceil;
import static org.apache.parquet.bytes.BytesUtils.readIntLittleEndianPaddedOnBitWidth;
import static org.apache.parquet.bytes.BytesUtils.readUnsignedVarInt;
public abstract class BaseRLEBitPackedDecoder
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(BaseRLEBitPackedDecoder.class).instanceSize();
private final boolean rleOnlyMode;
private final int bitWidth;
private final BytePacker packer;
private final InputStream inputStream;
protected Mode mode;
protected int currentCount;
protected int currentValue;
protected int[] currentBuffer;
public BaseRLEBitPackedDecoder(int valueCount, int bitWidth, InputStream inputStream)
{
checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
this.bitWidth = bitWidth;
if (bitWidth != 0) {
this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
this.inputStream = inputStream;
this.rleOnlyMode = false;
}
else {
this.rleOnlyMode = true;
this.packer = null;
this.inputStream = null;
this.mode = RLE;
this.currentValue = 0;
this.currentCount = valueCount;
}
}
public BaseRLEBitPackedDecoder(int rleValue, int rleValueCount)
{
this.rleOnlyMode = true;
this.bitWidth = 0;
this.packer = null;
this.inputStream = null;
this.mode = RLE;
this.currentValue = rleValue;
this.currentCount = rleValueCount;
}
public long getRetainedSizeInBytes()
{
return INSTANCE_SIZE + sizeOf(currentBuffer);
}
protected boolean decode()
throws IOException
{
if (rleOnlyMode) {
// for RLE only mode there is nothing more to read
return false;
}
if (inputStream.available() <= 0) {
currentCount = 0;
return false;
}
int header = readUnsignedVarInt(inputStream);
mode = (header & 1) == 0 ? RLE : PACKED;
switch (mode) {
case RLE:
currentCount = header >>> 1;
currentValue = readIntLittleEndianPaddedOnBitWidth(inputStream, bitWidth);
return true;
case PACKED:
int numGroups = header >>> 1;
currentCount = numGroups * 8;
currentBuffer = new int[currentCount];
byte[] bytes = new byte[numGroups * bitWidth];
int bytesToRead = (int) ceil((double) (currentCount * bitWidth) / 8.0D);
bytesToRead = Math.min(bytesToRead, inputStream.available());
DataInputStream dataInputStream = new DataInputStream(inputStream);
dataInputStream.readFully(bytes, 0, bytesToRead);
int valueIndex = 0;
for (int byteIndex = 0; valueIndex < currentCount; byteIndex += bitWidth) {
packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
valueIndex += 8;
}
return true;
default:
throw new ParquetDecodingException("not a valid mode " + mode);
}
}
public enum Mode
{
RLE,
PACKED
}
}