ColumnEncoding.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata;
import java.util.Optional;
import java.util.SortedMap;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkState;
import static java.util.Objects.requireNonNull;
public class ColumnEncoding
{
public enum ColumnEncodingKind
{
DIRECT,
DICTIONARY,
DIRECT_V2,
DICTIONARY_V2,
DWRF_DIRECT,
DWRF_MAP_FLAT,
}
public static final int DEFAULT_SEQUENCE_ID = 0;
private final ColumnEncodingKind columnEncodingKind;
private final int dictionarySize;
// DWRF supports the concept of sequences.
// A column can be modeled as multiple sequences that are independently encoded.
// For example, for a flat map column, each key will have a
// separate value stream with its own column encoding.
// These additional sequence IDs start from 1 and may not be consecutive, for example, the file may be updated to
// remove keys from the flat map without changing the sequence IDs associated with each key.
// Sorted so that when we iterate over the map the DwrfSequenceEncodings are returned in ascending Sequence ID order
private final Optional<SortedMap<Integer, DwrfSequenceEncoding>> additionalSequenceEncodings;
public ColumnEncoding(ColumnEncodingKind columnEncodingKind, int dictionarySize)
{
this(columnEncodingKind, dictionarySize, Optional.empty());
}
public ColumnEncoding(ColumnEncodingKind columnEncodingKind, int dictionarySize, Optional<SortedMap<Integer, DwrfSequenceEncoding>> additionalSequenceEncodings)
{
this.columnEncodingKind = requireNonNull(columnEncodingKind, "columnEncodingKind is null");
this.dictionarySize = dictionarySize;
this.additionalSequenceEncodings = additionalSequenceEncodings;
}
public ColumnEncodingKind getColumnEncodingKind()
{
return columnEncodingKind;
}
public int getDictionarySize()
{
return dictionarySize;
}
public Optional<SortedMap<Integer, DwrfSequenceEncoding>> getAdditionalSequenceEncodings()
{
return additionalSequenceEncodings;
}
public ColumnEncoding getColumnEncoding(int sequence)
{
if (sequence == 0) {
return this;
}
checkState(
additionalSequenceEncodings.isPresent(),
"Got non-zero sequence: %s, but there are no additional sequence encodings: %s", sequence, this);
DwrfSequenceEncoding sequenceEncoding = additionalSequenceEncodings.get().get(sequence);
checkState(
sequenceEncoding != null,
"Non-zero sequence %s is not present in the ColumnEncoding's additional sequences: %s",
sequence,
additionalSequenceEncodings.get().keySet());
return sequenceEncoding.getValueEncoding();
}
@Override
public String toString()
{
return toStringHelper(this)
.add("columnEncodingKind", columnEncodingKind)
.add("dictionarySize", dictionarySize)
.add("additionalSequenceEncodings", additionalSequenceEncodings)
.toString();
}
}