TestHiveClientConfig.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.airlift.configuration.testing.ConfigAssertions;
import com.facebook.drift.transport.netty.codec.Protocol;
import com.facebook.presto.hive.HiveClientConfig.HdfsAuthenticationType;
import com.facebook.presto.hive.s3.S3FileSystemType;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.units.DataSize;
import io.airlift.units.DataSize.Unit;
import io.airlift.units.Duration;
import org.testng.annotations.Test;
import java.time.ZoneId;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
import static com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE;
import static com.facebook.presto.hive.BucketFunctionType.PRESTO_NATIVE;
import static com.facebook.presto.hive.HiveClientConfig.InsertExistingPartitionsBehavior.APPEND;
import static com.facebook.presto.hive.HiveCompressionCodec.NONE;
import static com.facebook.presto.hive.HiveCompressionCodec.SNAPPY;
import static com.facebook.presto.hive.HiveStorageFormat.DWRF;
import static com.facebook.presto.hive.HiveStorageFormat.ORC;
import static com.facebook.presto.hive.TestHiveUtil.nonDefaultTimeZone;
import static io.airlift.units.DataSize.Unit.BYTE;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
public class TestHiveClientConfig
{
@Test
public void testDefaults()
{
ConfigAssertions.assertRecordedDefaults(ConfigAssertions.recordDefaults(HiveClientConfig.class)
.setTimeZone(TimeZone.getDefault().getID())
.setMaxSplitSize(new DataSize(64, Unit.MEGABYTE))
.setMaxPartitionsPerScan(100_000)
.setMaxOutstandingSplits(1_000)
.setMaxOutstandingSplitsSize(new DataSize(256, Unit.MEGABYTE))
.setMaxSplitIteratorThreads(1_000)
.setAllowCorruptWritesForTesting(false)
.setMinPartitionBatchSize(10)
.setMaxPartitionBatchSize(100)
.setMaxInitialSplits(200)
.setMaxInitialSplitSize(new DataSize(32, Unit.MEGABYTE))
.setSplitLoaderConcurrency(4)
.setDomainCompactionThreshold(100)
.setMaxConcurrentFileRenames(20)
.setMaxConcurrentZeroRowFileCreations(20)
.setRecursiveDirWalkerEnabled(false)
.setDfsTimeout(new Duration(60, TimeUnit.SECONDS))
.setIpcPingInterval(new Duration(10, TimeUnit.SECONDS))
.setDfsConnectTimeout(new Duration(500, TimeUnit.MILLISECONDS))
.setDfsConnectMaxRetries(5)
.setDomainSocketPath(null)
.setS3FileSystemType(S3FileSystemType.PRESTO)
.setResourceConfigFiles("")
.setHiveStorageFormat(ORC)
.setCompressionCodec(HiveCompressionCodec.GZIP)
.setOrcCompressionCodec(HiveCompressionCodec.GZIP)
.setRespectTableFormat(true)
.setImmutablePartitions(false)
.setInsertExistingPartitionsBehavior(APPEND)
.setCreateEmptyBucketFiles(true)
.setInsertOverwriteImmutablePartitionEnabled(false)
.setFailFastOnInsertIntoImmutablePartitionsEnabled(true)
.setSortedWritingEnabled(true)
.setMaxPartitionsPerWriter(100)
.setWriteValidationThreads(16)
.setTextMaxLineLength(new DataSize(100, Unit.MEGABYTE))
.setUseOrcColumnNames(false)
.setAssumeCanonicalPartitionKeys(false)
.setOrcDefaultBloomFilterFpp(0.05)
.setRcfileOptimizedWriterEnabled(true)
.setRcfileWriterValidate(false)
.setHdfsAuthenticationType(HdfsAuthenticationType.NONE)
.setHdfsImpersonationEnabled(false)
.setSkipDeletionForAlter(false)
.setSkipTargetCleanupOnRollback(false)
.setBucketExecutionEnabled(true)
.setIgnoreTableBucketing(false)
.setMinBucketCountToNotIgnoreTableBucketing(0)
.setMaxBucketsForGroupedExecution(1_000_000)
.setSortedWriteToTempPathEnabled(false)
.setSortedWriteTempPathSubdirectoryCount(10)
.setFileSystemMaxCacheSize(1000)
.setTableStatisticsEnabled(true)
.setOptimizeMismatchedBucketCount(false)
.setWritesToNonManagedTablesEnabled(false)
.setCreatesOfNonManagedTablesEnabled(true)
.setHdfsWireEncryptionEnabled(false)
.setPartitionStatisticsSampleSize(100)
.setIgnoreCorruptedStatistics(false)
.setCollectColumnStatisticsOnWrite(false)
.setPartitionStatisticsBasedOptimizationEnabled(false)
.setS3SelectPushdownEnabled(false)
.setS3SelectPushdownMaxConnections(500)
.setOrderBasedExecutionEnabled(false)
.setTemporaryStagingDirectoryEnabled(true)
.setTemporaryStagingDirectoryPath("/tmp/presto-${USER}")
.setTemporaryTableSchema("default")
.setTemporaryTableStorageFormat(ORC)
.setTemporaryTableCompressionCodec(SNAPPY)
.setCreateEmptyBucketFilesForTemporaryTable(false)
.setUsePageFileForHiveUnsupportedType(true)
.setPushdownFilterEnabled(false)
.setParquetPushdownFilterEnabled(false)
.setAdaptiveFilterReorderingEnabled(true)
.setFileStatusCacheExpireAfterWrite(new Duration(0, TimeUnit.SECONDS))
.setFileStatusCacheMaxRetainedSize(new DataSize(0, KILOBYTE))
.setFileStatusCacheTables("")
.setPageFileStripeMaxSize(new DataSize(24, Unit.MEGABYTE))
.setBucketFunctionTypeForExchange(HIVE_COMPATIBLE)
.setBucketFunctionTypeForCteMaterialization(PRESTO_NATIVE)
.setParquetDereferencePushdownEnabled(false)
.setIgnoreUnreadablePartition(false)
.setMaxMetadataUpdaterThreads(100)
.setPartialAggregationPushdownEnabled(false)
.setPartialAggregationPushdownForVariableLengthDatatypesEnabled(false)
.setFileRenamingEnabled(false)
.setPreferManifestsToListFiles(false)
.setManifestVerificationEnabled(false)
.setUndoMetastoreOperationsEnabled(true)
.setOptimizedPartitionUpdateSerializationEnabled(false)
.setVerboseRuntimeStatsEnabled(false)
.setPartitionLeaseDuration(new Duration(0, TimeUnit.SECONDS))
.setMaterializedViewMissingPartitionsThreshold(100)
.setLooseMemoryAccountingEnabled(false)
.setReadColumnIndexFilter(false)
.setSizeBasedSplitWeightsEnabled(true)
.setDynamicSplitSizesEnabled(false)
.setMinimumAssignedSplitWeight(0.05)
.setUserDefinedTypeEncodingEnabled(false)
.setUseRecordPageSourceForCustomSplit(true)
.setFileSplittable(true)
.setHudiMetadataEnabled(false)
.setHudiTablesUseMergedView(null)
.setThriftProtocol(Protocol.BINARY)
.setThriftBufferSize(new DataSize(128, BYTE))
.setCopyOnFirstWriteConfigurationEnabled(false)
.setPartitionFilteringFromMetastoreEnabled(true)
.setParallelParsingOfPartitionValuesEnabled(false)
.setMaxParallelParsingConcurrency(100)
.setQuickStatsEnabled(false)
.setQuickStatsInlineBuildTimeout(new Duration(60, TimeUnit.SECONDS))
.setQuickStatsBackgroundBuildTimeout(new Duration(0, TimeUnit.SECONDS))
.setQuickStatsCacheExpiry(new Duration(24, TimeUnit.HOURS))
.setQuickStatsReaperExpiry(new Duration(5, TimeUnit.MINUTES))
.setParquetQuickStatsFileMetadataFetchTimeout(new Duration(60, TimeUnit.SECONDS))
.setMaxConcurrentQuickStatsCalls(100)
.setMaxConcurrentParquetQuickStatsCalls(500)
.setCteVirtualBucketCount(128)
.setSkipEmptyFilesEnabled(false)
.setOptimizeParsingOfPartitionValues(false)
.setOptimizeParsingOfPartitionValuesThreshold(500)
.setLegacyTimestampBucketing(false)
.setSymlinkOptimizedReaderEnabled(true));
}
@Test
public void testExplicitPropertyMappings()
{
Map<String, String> properties = new ImmutableMap.Builder<String, String>()
.put("hive.time-zone", nonDefaultTimeZone().getID())
.put("hive.max-split-size", "256MB")
.put("hive.max-partitions-per-scan", "123")
.put("hive.max-outstanding-splits", "10")
.put("hive.max-outstanding-splits-size", "32MB")
.put("hive.max-split-iterator-threads", "10")
.put("hive.allow-corrupt-writes-for-testing", "true")
.put("hive.metastore.partition-batch-size.min", "1")
.put("hive.metastore.partition-batch-size.max", "1000")
.put("hive.dfs.ipc-ping-interval", "34s")
.put("hive.dfs-timeout", "33s")
.put("hive.dfs.connect.timeout", "20s")
.put("hive.dfs.connect.max-retries", "10")
.put("hive.dfs.domain-socket-path", "/foo")
.put("hive.s3-file-system-type", "EMRFS")
.put("hive.config.resources", "/foo.xml,/bar.xml")
.put("hive.max-initial-splits", "10")
.put("hive.max-initial-split-size", "16MB")
.put("hive.split-loader-concurrency", "1")
.put("hive.domain-compaction-threshold", "42")
.put("hive.recursive-directories", "true")
.put("hive.storage-format", "SEQUENCEFILE")
.put("hive.compression-codec", "NONE")
.put("hive.orc-compression-codec", "ZSTD")
.put("hive.respect-table-format", "false")
.put("hive.immutable-partitions", "true")
.put("hive.insert-existing-partitions-behavior", "OVERWRITE")
.put("hive.create-empty-bucket-files", "false")
.put("hive.insert-overwrite-immutable-partitions-enabled", "true")
.put("hive.fail-fast-on-insert-into-immutable-partitions-enabled", "false")
.put("hive.max-partitions-per-writers", "222")
.put("hive.write-validation-threads", "11")
.put("hive.max-concurrent-file-renames", "100")
.put("hive.max-concurrent-zero-row-file-creations", "100")
.put("hive.assume-canonical-partition-keys", "true")
.put("hive.text.max-line-length", "13MB")
.put("hive.orc.use-column-names", "true")
.put("hive.orc.default-bloom-filter-fpp", "0.96")
.put("hive.rcfile-optimized-writer.enabled", "false")
.put("hive.rcfile.writer.validate", "true")
.put("hive.hdfs.authentication.type", "KERBEROS")
.put("hive.hdfs.impersonation.enabled", "true")
.put("hive.skip-deletion-for-alter", "true")
.put("hive.skip-target-cleanup-on-rollback", "true")
.put("hive.bucket-execution", "false")
.put("hive.sorted-writing", "false")
.put("hive.ignore-table-bucketing", "true")
.put("hive.min-bucket-count-to-not-ignore-table-bucketing", "1024")
.put("hive.max-buckets-for-grouped-execution", "100")
.put("hive.sorted-write-to-temp-path-enabled", "true")
.put("hive.sorted-write-temp-path-subdirectory-count", "50")
.put("hive.fs.cache.max-size", "1010")
.put("hive.table-statistics-enabled", "false")
.put("hive.optimize-mismatched-bucket-count", "true")
.put("hive.non-managed-table-writes-enabled", "true")
.put("hive.non-managed-table-creates-enabled", "false")
.put("hive.hdfs.wire-encryption.enabled", "true")
.put("hive.partition-statistics-sample-size", "1234")
.put("hive.ignore-corrupted-statistics", "true")
.put("hive.collect-column-statistics-on-write", "true")
.put("hive.partition-statistics-based-optimization-enabled", "true")
.put("hive.s3select-pushdown.enabled", "true")
.put("hive.s3select-pushdown.max-connections", "1234")
.put("hive.order-based-execution-enabled", "true")
.put("hive.temporary-staging-directory-enabled", "false")
.put("hive.temporary-staging-directory-path", "updated")
.put("hive.temporary-table-schema", "other")
.put("hive.temporary-table-storage-format", "DWRF")
.put("hive.temporary-table-compression-codec", "NONE")
.put("hive.create-empty-bucket-files-for-temporary-table", "true")
.put("hive.use-pagefile-for-hive-unsupported-type", "false")
.put("hive.pushdown-filter-enabled", "true")
.put("hive.parquet.pushdown-filter-enabled", "true")
.put("hive.adaptive-filter-reordering-enabled", "false")
.put("hive.file-status-cache-tables", "foo.bar1, foo.bar2")
.put("hive.file-status-cache.max-retained-size", "500MB")
.put("hive.file-status-cache-expire-time", "30m")
.put("hive.pagefile.writer.stripe-max-size", "1kB")
.put("hive.bucket-function-type-for-exchange", "PRESTO_NATIVE")
.put("hive.bucket-function-type-for-cte-materialization", "HIVE_COMPATIBLE")
.put("hive.enable-parquet-dereference-pushdown", "true")
.put("hive.ignore-unreadable-partition", "true")
.put("hive.max-metadata-updater-threads", "1000")
.put("hive.partial_aggregation_pushdown_enabled", "true")
.put("hive.partial_aggregation_pushdown_for_variable_length_datatypes_enabled", "true")
.put("hive.file_renaming_enabled", "true")
.put("hive.prefer-manifests-to-list-files", "true")
.put("hive.manifest-verification-enabled", "true")
.put("hive.undo-metastore-operations-enabled", "false")
.put("hive.experimental-optimized-partition-update-serialization-enabled", "true")
.put("hive.partition-lease-duration", "4h")
.put("hive.loose-memory-accounting-enabled", "true")
.put("hive.verbose-runtime-stats-enabled", "true")
.put("hive.materialized-view-missing-partitions-threshold", "50")
.put("hive.parquet-column-index-filter-enabled", "true")
.put("hive.size-based-split-weights-enabled", "false")
.put("hive.dynamic-split-sizes-enabled", "true")
.put("hive.user-defined-type-encoding-enabled", "true")
.put("hive.minimum-assigned-split-weight", "1.0")
.put("hive.use-record-page-source-for-custom-split", "false")
.put("hive.file-splittable", "false")
.put("hive.hudi-metadata-enabled", "true")
.put("hive.hudi-tables-use-merged-view", "default.user")
.put("hive.internal-communication.thrift-transport-protocol", "COMPACT")
.put("hive.internal-communication.thrift-transport-buffer-size", "256B")
.put("hive.copy-on-first-write-configuration-enabled", "true")
.put("hive.partition-filtering-from-metastore-enabled", "false")
.put("hive.parallel-parsing-of-partition-values-enabled", "true")
.put("hive.max-parallel-parsing-concurrency", "200")
.put("hive.quick-stats.enabled", "true")
.put("hive.quick-stats.inline-build-timeout", "61s")
.put("hive.quick-stats.background-build-timeout", "1s")
.put("hive.quick-stats.cache-expiry", "5h")
.put("hive.quick-stats.reaper-expiry", "15m")
.put("hive.quick-stats.parquet.file-metadata-fetch-timeout", "30s")
.put("hive.quick-stats.parquet.max-concurrent-calls", "399")
.put("hive.quick-stats.max-concurrent-calls", "101")
.put("hive.cte-virtual-bucket-count", "256")
.put("hive.skip-empty-files", "true")
.put("hive.optimize-parsing-of-partition-values-enabled", "true")
.put("hive.optimize-parsing-of-partition-values-threshold", "100")
.put("hive.legacy-timestamp-bucketing", "true")
.put("hive.experimental.symlink.optimized-reader.enabled", "false")
.build();
HiveClientConfig expected = new HiveClientConfig()
.setTimeZone(TimeZone.getTimeZone(ZoneId.of(nonDefaultTimeZone().getID())).getID())
.setMaxSplitSize(new DataSize(256, Unit.MEGABYTE))
.setMaxPartitionsPerScan(123)
.setMaxOutstandingSplits(10)
.setMaxOutstandingSplitsSize(new DataSize(32, Unit.MEGABYTE))
.setMaxSplitIteratorThreads(10)
.setAllowCorruptWritesForTesting(true)
.setMinPartitionBatchSize(1)
.setMaxPartitionBatchSize(1000)
.setMaxInitialSplits(10)
.setMaxInitialSplitSize(new DataSize(16, Unit.MEGABYTE))
.setSplitLoaderConcurrency(1)
.setDomainCompactionThreshold(42)
.setMaxConcurrentFileRenames(100)
.setMaxConcurrentZeroRowFileCreations(100)
.setRecursiveDirWalkerEnabled(true)
.setIpcPingInterval(new Duration(34, TimeUnit.SECONDS))
.setDfsTimeout(new Duration(33, TimeUnit.SECONDS))
.setDfsConnectTimeout(new Duration(20, TimeUnit.SECONDS))
.setDfsConnectMaxRetries(10)
.setResourceConfigFiles(ImmutableList.of("/foo.xml", "/bar.xml"))
.setHiveStorageFormat(HiveStorageFormat.SEQUENCEFILE)
.setCompressionCodec(HiveCompressionCodec.NONE)
.setOrcCompressionCodec(HiveCompressionCodec.ZSTD)
.setRespectTableFormat(false)
.setImmutablePartitions(true)
.setCreateEmptyBucketFiles(false)
.setInsertOverwriteImmutablePartitionEnabled(true)
.setFailFastOnInsertIntoImmutablePartitionsEnabled(false)
.setMaxPartitionsPerWriter(222)
.setWriteValidationThreads(11)
.setDomainSocketPath("/foo")
.setS3FileSystemType(S3FileSystemType.EMRFS)
.setTextMaxLineLength(new DataSize(13, Unit.MEGABYTE))
.setUseOrcColumnNames(true)
.setAssumeCanonicalPartitionKeys(true)
.setOrcDefaultBloomFilterFpp(0.96)
.setRcfileOptimizedWriterEnabled(false)
.setRcfileWriterValidate(true)
.setHdfsAuthenticationType(HdfsAuthenticationType.KERBEROS)
.setHdfsImpersonationEnabled(true)
.setSkipDeletionForAlter(true)
.setSkipTargetCleanupOnRollback(true)
.setBucketExecutionEnabled(false)
.setSortedWritingEnabled(false)
.setIgnoreTableBucketing(true)
.setMaxBucketsForGroupedExecution(100)
.setSortedWriteToTempPathEnabled(true)
.setSortedWriteTempPathSubdirectoryCount(50)
.setFileSystemMaxCacheSize(1010)
.setTableStatisticsEnabled(false)
.setOptimizeMismatchedBucketCount(true)
.setWritesToNonManagedTablesEnabled(true)
.setCreatesOfNonManagedTablesEnabled(false)
.setHdfsWireEncryptionEnabled(true)
.setPartitionStatisticsSampleSize(1234)
.setIgnoreCorruptedStatistics(true)
.setMinBucketCountToNotIgnoreTableBucketing(1024)
.setCollectColumnStatisticsOnWrite(true)
.setPartitionStatisticsBasedOptimizationEnabled(true)
.setS3SelectPushdownEnabled(true)
.setS3SelectPushdownMaxConnections(1234)
.setOrderBasedExecutionEnabled(true)
.setTemporaryStagingDirectoryEnabled(false)
.setTemporaryStagingDirectoryPath("updated")
.setTemporaryTableSchema("other")
.setTemporaryTableStorageFormat(DWRF)
.setTemporaryTableCompressionCodec(NONE)
.setCreateEmptyBucketFilesForTemporaryTable(true)
.setUsePageFileForHiveUnsupportedType(false)
.setPushdownFilterEnabled(true)
.setParquetPushdownFilterEnabled(true)
.setAdaptiveFilterReorderingEnabled(false)
.setFileStatusCacheTables("foo.bar1,foo.bar2")
.setFileStatusCacheMaxRetainedSize((new DataSize(500, MEGABYTE)))
.setFileStatusCacheExpireAfterWrite(new Duration(30, TimeUnit.MINUTES))
.setPageFileStripeMaxSize(new DataSize(1, Unit.KILOBYTE))
.setBucketFunctionTypeForExchange(PRESTO_NATIVE)
.setBucketFunctionTypeForCteMaterialization(HIVE_COMPATIBLE)
.setParquetDereferencePushdownEnabled(true)
.setIgnoreUnreadablePartition(true)
.setMaxMetadataUpdaterThreads(1000)
.setPartialAggregationPushdownEnabled(true)
.setPartialAggregationPushdownForVariableLengthDatatypesEnabled(true)
.setFileRenamingEnabled(true)
.setPreferManifestsToListFiles(true)
.setManifestVerificationEnabled(true)
.setUndoMetastoreOperationsEnabled(false)
.setOptimizedPartitionUpdateSerializationEnabled(true)
.setVerboseRuntimeStatsEnabled(true)
.setPartitionLeaseDuration(new Duration(4, TimeUnit.HOURS))
.setMaterializedViewMissingPartitionsThreshold(50)
.setLooseMemoryAccountingEnabled(true)
.setReadColumnIndexFilter(true)
.setSizeBasedSplitWeightsEnabled(false)
.setDynamicSplitSizesEnabled(true)
.setMinimumAssignedSplitWeight(1.0)
.setUserDefinedTypeEncodingEnabled(true)
.setUseRecordPageSourceForCustomSplit(false)
.setFileSplittable(false)
.setHudiMetadataEnabled(true)
.setHudiTablesUseMergedView("default.user")
.setThriftProtocol(Protocol.COMPACT)
.setThriftBufferSize(new DataSize(256, BYTE))
.setCopyOnFirstWriteConfigurationEnabled(true)
.setPartitionFilteringFromMetastoreEnabled(false)
.setParallelParsingOfPartitionValuesEnabled(true)
.setMaxParallelParsingConcurrency(200)
.setQuickStatsEnabled(true)
.setQuickStatsInlineBuildTimeout(new Duration(61, TimeUnit.SECONDS))
.setQuickStatsBackgroundBuildTimeout(new Duration(1, TimeUnit.SECONDS))
.setQuickStatsCacheExpiry(new Duration(5, TimeUnit.HOURS))
.setQuickStatsReaperExpiry(new Duration(15, TimeUnit.MINUTES))
.setParquetQuickStatsFileMetadataFetchTimeout(new Duration(30, TimeUnit.SECONDS))
.setMaxConcurrentParquetQuickStatsCalls(399)
.setMaxConcurrentQuickStatsCalls(101)
.setSkipEmptyFilesEnabled(true)
.setCteVirtualBucketCount(256)
.setOptimizeParsingOfPartitionValues(true)
.setOptimizeParsingOfPartitionValuesThreshold(100)
.setLegacyTimestampBucketing(true)
.setSymlinkOptimizedReaderEnabled(false);
ConfigAssertions.assertFullMapping(properties, expected);
}
}