OrcFileWriterConfig.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.airlift.configuration.Config;
import com.facebook.airlift.configuration.ConfigDescription;
import com.facebook.presto.orc.DefaultOrcWriterFlushPolicy;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.metadata.DwrfStripeCacheMode;
import com.facebook.presto.orc.writer.StreamLayoutFactory;
import io.airlift.units.DataSize;
import javax.validation.constraints.NotNull;
import java.util.OptionalInt;
import static com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType.BY_COLUMN_SIZE;
@SuppressWarnings("unused")
public class OrcFileWriterConfig
{
public enum StreamLayoutType
{
BY_STREAM_SIZE,
BY_COLUMN_SIZE,
}
public static final int DEFAULT_COMPRESSION_LEVEL = Integer.MIN_VALUE;
private static final boolean DEFAULT_FLAT_MAP_WRITER_ENABLED = false;
private DataSize stripeMinSize = DefaultOrcWriterFlushPolicy.DEFAULT_STRIPE_MIN_SIZE;
private DataSize stripeMaxSize = DefaultOrcWriterFlushPolicy.DEFAULT_STRIPE_MAX_SIZE;
private int stripeMaxRowCount = DefaultOrcWriterFlushPolicy.DEFAULT_STRIPE_MAX_ROW_COUNT;
private int rowGroupMaxRowCount = OrcWriterOptions.DEFAULT_ROW_GROUP_MAX_ROW_COUNT;
private DataSize dictionaryMaxMemory = OrcWriterOptions.DEFAULT_DICTIONARY_MAX_MEMORY;
private DataSize stringStatisticsLimit = OrcWriterOptions.DEFAULT_MAX_STRING_STATISTICS_LIMIT;
private DataSize maxCompressionBufferSize = OrcWriterOptions.DEFAULT_MAX_COMPRESSION_BUFFER_SIZE;
private StreamLayoutType streamLayoutType = BY_COLUMN_SIZE;
private boolean isDwrfStripeCacheEnabled = true;
private DataSize dwrfStripeCacheMaxSize = OrcWriterOptions.DEFAULT_DWRF_STRIPE_CACHE_MAX_SIZE;
private DwrfStripeCacheMode dwrfStripeCacheMode = OrcWriterOptions.DEFAULT_DWRF_STRIPE_CACHE_MODE;
private int compressionLevel = DEFAULT_COMPRESSION_LEVEL;
private boolean isIntegerDictionaryEncodingEnabled = OrcWriterOptions.DEFAULT_INTEGER_DICTIONARY_ENCODING_ENABLED;
private boolean isStringDictionaryEncodingEnabled = OrcWriterOptions.DEFAULT_STRING_DICTIONARY_ENCODING_ENABLED;
private boolean isStringDictionarySortingEnabled = OrcWriterOptions.DEFAULT_STRING_DICTIONARY_SORTING_ENABLED;
private boolean isFlatMapWriterEnabled = DEFAULT_FLAT_MAP_WRITER_ENABLED;
private boolean addHostnameToFileMetadataEnabled = true;
public OrcWriterOptions.Builder toOrcWriterOptionsBuilder()
{
DefaultOrcWriterFlushPolicy flushPolicy = DefaultOrcWriterFlushPolicy.builder()
.withStripeMinSize(stripeMinSize)
.withStripeMaxSize(stripeMaxSize)
.withStripeMaxRowCount(stripeMaxRowCount)
.build();
OptionalInt resolvedCompressionLevel = OptionalInt.empty();
if (compressionLevel != DEFAULT_COMPRESSION_LEVEL) {
resolvedCompressionLevel = OptionalInt.of(compressionLevel);
}
// Give separate copy to callers for isolation.
return OrcWriterOptions.builder()
.withFlushPolicy(flushPolicy)
.withRowGroupMaxRowCount(rowGroupMaxRowCount)
.withDictionaryMaxMemory(dictionaryMaxMemory)
.withMaxStringStatisticsLimit(stringStatisticsLimit)
.withMaxCompressionBufferSize(maxCompressionBufferSize)
.withStreamLayoutFactory(getStreamLayoutFactory(streamLayoutType))
.withDwrfStripeCacheEnabled(isDwrfStripeCacheEnabled)
.withDwrfStripeCacheMaxSize(dwrfStripeCacheMaxSize)
.withDwrfStripeCacheMode(dwrfStripeCacheMode)
.withCompressionLevel(resolvedCompressionLevel);
}
@NotNull
public DataSize getStripeMinSize()
{
return stripeMinSize;
}
@Config("hive.orc.writer.stripe-min-size")
public OrcFileWriterConfig setStripeMinSize(DataSize stripeMinSize)
{
this.stripeMinSize = stripeMinSize;
return this;
}
@NotNull
public DataSize getStripeMaxSize()
{
return this.stripeMaxSize;
}
@Config("hive.orc.writer.stripe-max-size")
public OrcFileWriterConfig setStripeMaxSize(DataSize stripeMaxSize)
{
this.stripeMaxSize = stripeMaxSize;
return this;
}
public int getStripeMaxRowCount()
{
return stripeMaxRowCount;
}
@Config("hive.orc.writer.stripe-max-rows")
public OrcFileWriterConfig setStripeMaxRowCount(int stripeMaxRowCount)
{
this.stripeMaxRowCount = stripeMaxRowCount;
return this;
}
public int getRowGroupMaxRowCount()
{
return rowGroupMaxRowCount;
}
@Config("hive.orc.writer.row-group-max-rows")
public OrcFileWriterConfig setRowGroupMaxRowCount(int rowGroupMaxRowCount)
{
this.rowGroupMaxRowCount = rowGroupMaxRowCount;
return this;
}
@NotNull
public DataSize getDictionaryMaxMemory()
{
return dictionaryMaxMemory;
}
@Config("hive.orc.writer.dictionary-max-memory")
public OrcFileWriterConfig setDictionaryMaxMemory(DataSize dictionaryMaxMemory)
{
this.dictionaryMaxMemory = dictionaryMaxMemory;
return this;
}
public boolean isIntegerDictionaryEncodingEnabled()
{
return isIntegerDictionaryEncodingEnabled;
}
@Config("hive.orc.writer.integer-dictionary-encoding-enabled")
public OrcFileWriterConfig setIntegerDictionaryEncodingEnabled(boolean isIntegerDictionaryEncodingEnabled)
{
this.isIntegerDictionaryEncodingEnabled = isIntegerDictionaryEncodingEnabled;
return this;
}
public boolean isStringDictionaryEncodingEnabled()
{
return isStringDictionaryEncodingEnabled;
}
@Config("hive.orc.writer.string-dictionary-encoding-enabled")
public OrcFileWriterConfig setStringDictionaryEncodingEnabled(boolean isStringDictionaryEncodingEnabled)
{
this.isStringDictionaryEncodingEnabled = isStringDictionaryEncodingEnabled;
return this;
}
public boolean isStringDictionarySortingEnabled()
{
return isStringDictionarySortingEnabled;
}
@Config("hive.orc.writer.string-dictionary-sorting-enabled")
public OrcFileWriterConfig setStringDictionarySortingEnabled(boolean isStringDictionarySortingEnabled)
{
this.isStringDictionarySortingEnabled = isStringDictionarySortingEnabled;
return this;
}
public boolean isFlatMapWriterEnabled()
{
return isFlatMapWriterEnabled;
}
@Config("hive.orc.writer.flat-map-writer-enabled")
public OrcFileWriterConfig setFlatMapWriterEnabled(boolean isFlatMapWriterEnabled)
{
this.isFlatMapWriterEnabled = isFlatMapWriterEnabled;
return this;
}
public int getCompressionLevel()
{
return compressionLevel;
}
@Config("hive.orc.writer.compression-level")
public OrcFileWriterConfig setCompressionLevel(int compressionLevel)
{
this.compressionLevel = compressionLevel;
return this;
}
@NotNull
public DataSize getStringStatisticsLimit()
{
return stringStatisticsLimit;
}
@Config("hive.orc.writer.string-statistics-limit")
public OrcFileWriterConfig setStringStatisticsLimit(DataSize stringStatisticsLimit)
{
this.stringStatisticsLimit = stringStatisticsLimit;
return this;
}
@NotNull
public DataSize getMaxCompressionBufferSize()
{
return maxCompressionBufferSize;
}
@Config("hive.orc.writer.max-compression-buffer-size")
public OrcFileWriterConfig setMaxCompressionBufferSize(DataSize maxCompressionBufferSize)
{
this.maxCompressionBufferSize = maxCompressionBufferSize;
return this;
}
@NotNull
public StreamLayoutType getStreamLayoutType()
{
return streamLayoutType;
}
@Config("hive.orc.writer.stream-layout-type")
public OrcFileWriterConfig setStreamLayoutType(StreamLayoutType streamLayoutType)
{
this.streamLayoutType = streamLayoutType;
return this;
}
public boolean isDwrfStripeCacheEnabled()
{
return isDwrfStripeCacheEnabled;
}
@Config("hive.orc.writer.dwrf-stripe-cache-enabled")
public OrcFileWriterConfig setDwrfStripeCacheEnabled(boolean isDwrfStripeCacheEnabled)
{
this.isDwrfStripeCacheEnabled = isDwrfStripeCacheEnabled;
return this;
}
@NotNull
public DataSize getDwrfStripeCacheMaxSize()
{
return dwrfStripeCacheMaxSize;
}
@Config("hive.orc.writer.dwrf-stripe-cache-max-size")
public OrcFileWriterConfig setDwrfStripeCacheMaxSize(DataSize dwrfStripeCacheMaxSize)
{
this.dwrfStripeCacheMaxSize = dwrfStripeCacheMaxSize;
return this;
}
@NotNull
public DwrfStripeCacheMode getDwrfStripeCacheMode()
{
return dwrfStripeCacheMode;
}
@Config("hive.orc.writer.dwrf-stripe-cache-mode")
public OrcFileWriterConfig setDwrfStripeCacheMode(DwrfStripeCacheMode dwrfStripeCacheMode)
{
this.dwrfStripeCacheMode = dwrfStripeCacheMode;
return this;
}
public boolean isAddHostnameToFileMetadataEnabled()
{
return addHostnameToFileMetadataEnabled;
}
@Config("hive.orc.writer.add-hostname-to-file-metadata-enabled")
@ConfigDescription("Add writer's hostname to the ORC/DWRF file footer. Can be used to troubleshoot file corruption issues.")
public OrcFileWriterConfig setAddHostnameToFileMetadataEnabled(boolean addHostnameToFileMetadataEnabled)
{
this.addHostnameToFileMetadataEnabled = addHostnameToFileMetadataEnabled;
return this;
}
private static StreamLayoutFactory getStreamLayoutFactory(StreamLayoutType type)
{
switch (type) {
case BY_COLUMN_SIZE:
return new StreamLayoutFactory.ColumnSizeLayoutFactory();
case BY_STREAM_SIZE:
return new StreamLayoutFactory.StreamSizeLayoutFactory();
default:
throw new RuntimeException("Unrecognized type " + type);
}
}
}