HiveCommonClientConfig.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.airlift.configuration.Config;
import com.facebook.airlift.configuration.ConfigDescription;
import com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode;
import com.facebook.presto.spi.schedule.NodeSelectionStrategy;
import io.airlift.units.DataSize;
import javax.validation.constraints.DecimalMax;
import javax.validation.constraints.DecimalMin;
import javax.validation.constraints.NotNull;
import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
public class HiveCommonClientConfig
{
private NodeSelectionStrategy nodeSelectionStrategy = NO_PREFERENCE;
private boolean orcBloomFiltersEnabled;
private boolean orcLazyReadSmallRanges = true;
private DataSize orcMaxBufferSize = new DataSize(8, MEGABYTE);
private DataSize orcMaxMergeDistance = new DataSize(1, MEGABYTE);
private DataSize orcMaxReadBlockSize = new DataSize(16, MEGABYTE);
private boolean orcOptimizedWriterEnabled = true;
private DataSize orcStreamBufferSize = new DataSize(8, MEGABYTE);
private OrcWriteValidationMode orcWriterValidationMode = OrcWriteValidationMode.BOTH;
private double orcWriterValidationPercentage;
private DataSize orcTinyStripeThreshold = new DataSize(8, MEGABYTE);
private boolean parquetBatchReadOptimizationEnabled;
private boolean parquetEnableBatchReaderVerification;
private DataSize parquetMaxReadBlockSize = new DataSize(16, MEGABYTE);
private boolean rangeFiltersOnSubscriptsEnabled;
private boolean readNullMaskedParquetEncryptedValueEnabled;
private boolean useParquetColumnNames;
private boolean zstdJniDecompressionEnabled;
private String catalogName;
private DataSize affinitySchedulingFileSectionSize = new DataSize(256, MEGABYTE);
public NodeSelectionStrategy getNodeSelectionStrategy()
{
return nodeSelectionStrategy;
}
@Config("hive.node-selection-strategy")
public HiveCommonClientConfig setNodeSelectionStrategy(NodeSelectionStrategy nodeSelectionStrategy)
{
this.nodeSelectionStrategy = nodeSelectionStrategy;
return this;
}
public boolean isOrcBloomFiltersEnabled()
{
return orcBloomFiltersEnabled;
}
@Config("hive.orc.bloom-filters.enabled")
public HiveCommonClientConfig setOrcBloomFiltersEnabled(boolean orcBloomFiltersEnabled)
{
this.orcBloomFiltersEnabled = orcBloomFiltersEnabled;
return this;
}
@Deprecated
public boolean isOrcLazyReadSmallRanges()
{
return orcLazyReadSmallRanges;
}
// TODO remove config option once efficacy is proven
@Deprecated
@Config("hive.orc.lazy-read-small-ranges")
@ConfigDescription("ORC read small disk ranges lazily")
public HiveCommonClientConfig setOrcLazyReadSmallRanges(boolean orcLazyReadSmallRanges)
{
this.orcLazyReadSmallRanges = orcLazyReadSmallRanges;
return this;
}
@NotNull
public DataSize getOrcMaxBufferSize()
{
return orcMaxBufferSize;
}
@Config("hive.orc.max-buffer-size")
public HiveCommonClientConfig setOrcMaxBufferSize(DataSize orcMaxBufferSize)
{
this.orcMaxBufferSize = orcMaxBufferSize;
return this;
}
@NotNull
public DataSize getOrcMaxMergeDistance()
{
return orcMaxMergeDistance;
}
@Config("hive.orc.max-merge-distance")
public HiveCommonClientConfig setOrcMaxMergeDistance(DataSize orcMaxMergeDistance)
{
this.orcMaxMergeDistance = orcMaxMergeDistance;
return this;
}
@NotNull
public DataSize getOrcMaxReadBlockSize()
{
return orcMaxReadBlockSize;
}
@Config("hive.orc.max-read-block-size")
public HiveCommonClientConfig setOrcMaxReadBlockSize(DataSize orcMaxReadBlockSize)
{
this.orcMaxReadBlockSize = orcMaxReadBlockSize;
return this;
}
@Deprecated
public boolean isOrcOptimizedWriterEnabled()
{
return orcOptimizedWriterEnabled;
}
@Deprecated
@Config("hive.orc.optimized-writer.enabled")
public HiveCommonClientConfig setOrcOptimizedWriterEnabled(boolean orcOptimizedWriterEnabled)
{
this.orcOptimizedWriterEnabled = orcOptimizedWriterEnabled;
return this;
}
@NotNull
public DataSize getOrcStreamBufferSize()
{
return orcStreamBufferSize;
}
@Config("hive.orc.stream-buffer-size")
public HiveCommonClientConfig setOrcStreamBufferSize(DataSize orcStreamBufferSize)
{
this.orcStreamBufferSize = orcStreamBufferSize;
return this;
}
@NotNull
public OrcWriteValidationMode getOrcWriterValidationMode()
{
return orcWriterValidationMode;
}
@Config("hive.orc.writer.validation-mode")
@ConfigDescription("Level of detail in ORC validation. Lower levels require more memory.")
public HiveCommonClientConfig setOrcWriterValidationMode(OrcWriteValidationMode orcWriterValidationMode)
{
this.orcWriterValidationMode = orcWriterValidationMode;
return this;
}
@DecimalMin("0.0")
@DecimalMax("100.0")
public double getOrcWriterValidationPercentage()
{
return orcWriterValidationPercentage;
}
@Config("hive.orc.writer.validation-percentage")
@ConfigDescription("Percentage of ORC files to validate after write by re-reading the whole file")
public HiveCommonClientConfig setOrcWriterValidationPercentage(double orcWriterValidationPercentage)
{
this.orcWriterValidationPercentage = orcWriterValidationPercentage;
return this;
}
@NotNull
public DataSize getOrcTinyStripeThreshold()
{
return orcTinyStripeThreshold;
}
@Config("hive.orc.tiny-stripe-threshold")
public HiveCommonClientConfig setOrcTinyStripeThreshold(DataSize orcTinyStripeThreshold)
{
this.orcTinyStripeThreshold = orcTinyStripeThreshold;
return this;
}
@Config("hive.parquet-batch-read-optimization-enabled")
@ConfigDescription("enable parquet batch reads optimization")
public HiveCommonClientConfig setParquetBatchReadOptimizationEnabled(boolean parquetBatchReadOptimizationEnabled)
{
this.parquetBatchReadOptimizationEnabled = parquetBatchReadOptimizationEnabled;
return this;
}
public boolean isParquetBatchReadOptimizationEnabled()
{
return this.parquetBatchReadOptimizationEnabled;
}
@Config("hive.enable-parquet-batch-reader-verification")
@ConfigDescription("enable optimized parquet reader")
public HiveCommonClientConfig setParquetBatchReaderVerificationEnabled(boolean parquetEnableBatchReaderVerification)
{
this.parquetEnableBatchReaderVerification = parquetEnableBatchReaderVerification;
return this;
}
public boolean isParquetBatchReaderVerificationEnabled()
{
return this.parquetEnableBatchReaderVerification;
}
@NotNull
public DataSize getParquetMaxReadBlockSize()
{
return parquetMaxReadBlockSize;
}
@Config("hive.parquet.max-read-block-size")
public HiveCommonClientConfig setParquetMaxReadBlockSize(DataSize parquetMaxReadBlockSize)
{
this.parquetMaxReadBlockSize = parquetMaxReadBlockSize;
return this;
}
public boolean isRangeFiltersOnSubscriptsEnabled()
{
return rangeFiltersOnSubscriptsEnabled;
}
@Config("hive.range-filters-on-subscripts-enabled")
@ConfigDescription("Experimental: enable pushdown of range filters on subscripts (a[2] = 5) into ORC column readers")
public HiveCommonClientConfig setRangeFiltersOnSubscriptsEnabled(boolean rangeFiltersOnSubscriptsEnabled)
{
this.rangeFiltersOnSubscriptsEnabled = rangeFiltersOnSubscriptsEnabled;
return this;
}
@Config("hive.read-null-masked-parquet-encrypted-value-enabled")
@ConfigDescription("Read null masked value when access is denied for an encrypted parquet column")
public HiveCommonClientConfig setReadNullMaskedParquetEncryptedValue(boolean readNullMaskedParquetEncryptedValueEnabled)
{
this.readNullMaskedParquetEncryptedValueEnabled = readNullMaskedParquetEncryptedValueEnabled;
return this;
}
public boolean getReadNullMaskedParquetEncryptedValue()
{
return this.readNullMaskedParquetEncryptedValueEnabled;
}
public boolean isUseParquetColumnNames()
{
return useParquetColumnNames;
}
@Config("hive.parquet.use-column-names")
@ConfigDescription("Access Parquet columns using names from the file")
public HiveCommonClientConfig setUseParquetColumnNames(boolean useParquetColumnNames)
{
this.useParquetColumnNames = useParquetColumnNames;
return this;
}
public boolean isZstdJniDecompressionEnabled()
{
return zstdJniDecompressionEnabled;
}
@Config("hive.zstd-jni-decompression-enabled")
public HiveCommonClientConfig setZstdJniDecompressionEnabled(boolean zstdJniDecompressionEnabled)
{
this.zstdJniDecompressionEnabled = zstdJniDecompressionEnabled;
return this;
}
public String getCatalogName()
{
return catalogName;
}
@Config("hive.metastore.catalog.name")
@ConfigDescription("Specified property to store the metastore catalog name.")
public HiveCommonClientConfig setCatalogName(String catalogName)
{
this.catalogName = catalogName;
return this;
}
@NotNull
public DataSize getAffinitySchedulingFileSectionSize()
{
return affinitySchedulingFileSectionSize;
}
@Config("hive.affinity-scheduling-file-section-size")
public HiveCommonClientConfig setAffinitySchedulingFileSectionSize(DataSize affinitySchedulingFileSectionSize)
{
this.affinitySchedulingFileSectionSize = affinitySchedulingFileSectionSize;
return this;
}
}