TestOrcWriterOptions.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc;
import com.facebook.presto.orc.metadata.DwrfStripeCacheMode;
import com.facebook.presto.orc.writer.StreamLayoutFactory;
import com.facebook.presto.orc.writer.StreamLayoutFactory.ColumnSizeLayoutFactory;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.airlift.units.DataSize;
import org.testng.annotations.Test;
import java.util.Optional;
import java.util.OptionalInt;
import static com.facebook.presto.orc.metadata.DwrfStripeCacheMode.INDEX_AND_FOOTER;
import static io.airlift.units.DataSize.Unit.BYTE;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.lang.Math.toIntExact;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
public class TestOrcWriterOptions
{
private static final DwrfStripeCacheMode DWRF_STRIPE_CACHE_MODE = INDEX_AND_FOOTER;
private static final DataSize DWRF_STRIPE_CACHE_MAX_SIZE = new DataSize(27, MEGABYTE);
@Test
public void testDwrfWriterOptionsProperties()
{
for (boolean value : ImmutableList.of(true, false)) {
OrcWriterOptions options = OrcWriterOptions.builder()
.withDwrfStripeCacheEnabled(value)
.withDwrfStripeCacheMode(DWRF_STRIPE_CACHE_MODE)
.withDwrfStripeCacheMaxSize(DWRF_STRIPE_CACHE_MAX_SIZE)
.build();
assertEquals(options.getDwrfStripeCacheOptions().isPresent(), value);
if (value) {
DwrfStripeCacheOptions dwrfStripeCacheOptions = options.getDwrfStripeCacheOptions().get();
assertEquals(dwrfStripeCacheOptions.getStripeCacheMode(), DWRF_STRIPE_CACHE_MODE);
assertEquals(dwrfStripeCacheOptions.getStripeCacheMaxSize(), DWRF_STRIPE_CACHE_MAX_SIZE);
}
else {
assertEquals(Optional.empty(), options.getDwrfStripeCacheOptions());
}
}
}
@Test
public void tesDefaultValues()
{
OrcWriterOptions options = OrcWriterOptions.builder().build();
assertEquals(options.getFlattenedColumns(), ImmutableSet.of());
assertFalse(options.isMapStatisticsEnabled());
assertEquals(options.getMaxFlattenedMapKeyCount(), 25000);
}
@Test
public void testProperties()
{
DataSize stripeMinSize = new DataSize(13, MEGABYTE);
DataSize stripeMaxSize = new DataSize(27, MEGABYTE);
int stripeMaxRowCount = 1_100_000;
int rowGroupMaxRowCount = 15_000;
DataSize dictionaryMaxMemory = new DataSize(13_000, KILOBYTE);
DataSize dictionaryMemoryRange = new DataSize(1_000, KILOBYTE);
int dictionaryUsefulCheckPerChunkFrequency = 9_999;
DataSize dictionaryUsefulCheckColumnSize = new DataSize(1, MEGABYTE);
DataSize stringMaxStatisticsLimit = new DataSize(128, BYTE);
DataSize maxCompressionBufferSize = new DataSize(512, KILOBYTE);
OptionalInt compressionLevel = OptionalInt.of(5);
StreamLayoutFactory streamLayoutFactory = new StreamLayoutFactory.StreamSizeLayoutFactory();
boolean integerDictionaryEncodingEnabled = true;
boolean stringDictionarySortingEnabled = false;
boolean stringDictionaryEncodingEnabled = false;
int preserveDirectEncodingStripeCount = 10;
boolean mapStatisticsEnabled = true;
int maxFlattenedMapKeyCount = 27;
OrcWriterOptions.Builder builder = OrcWriterOptions.builder()
.withFlushPolicy(DefaultOrcWriterFlushPolicy.builder()
.withStripeMinSize(stripeMinSize)
.withStripeMaxSize(stripeMaxSize)
.withStripeMaxRowCount(stripeMaxRowCount)
.build())
.withRowGroupMaxRowCount(rowGroupMaxRowCount)
.withDictionaryMaxMemory(dictionaryMaxMemory)
.withDictionaryMemoryAlmostFullRange(dictionaryMemoryRange)
.withDictionaryUsefulCheckPerChunkFrequency(dictionaryUsefulCheckPerChunkFrequency)
.withDictionaryUsefulCheckColumnSize(dictionaryUsefulCheckColumnSize)
.withMaxStringStatisticsLimit(stringMaxStatisticsLimit)
.withMaxCompressionBufferSize(maxCompressionBufferSize)
.withCompressionLevel(compressionLevel)
.withStreamLayoutFactory(streamLayoutFactory)
.withIntegerDictionaryEncodingEnabled(integerDictionaryEncodingEnabled)
.withStringDictionarySortingEnabled(stringDictionarySortingEnabled)
.withStringDictionaryEncodingEnabled(stringDictionaryEncodingEnabled)
.withPreserveDirectEncodingStripeCount(preserveDirectEncodingStripeCount)
.withFlattenedColumns(ImmutableSet.of(4, 3))
.withMapStatisticsEnabled(mapStatisticsEnabled)
.withMaxFlattenedMapKeyCount(maxFlattenedMapKeyCount);
OrcWriterOptions options = builder.build();
assertEquals(toIntExact(stripeMinSize.toBytes()), options.getFlushPolicy().getStripeMinBytes());
assertEquals(toIntExact(stripeMaxSize.toBytes()), options.getFlushPolicy().getStripeMaxBytes());
assertEquals(stripeMaxRowCount, options.getFlushPolicy().getStripeMaxRowCount());
assertEquals(rowGroupMaxRowCount, options.getRowGroupMaxRowCount());
assertEquals(dictionaryMaxMemory, options.getDictionaryMaxMemory());
assertEquals(dictionaryMemoryRange, options.getDictionaryMemoryAlmostFullRange());
assertEquals(dictionaryUsefulCheckPerChunkFrequency, options.getDictionaryUsefulCheckPerChunkFrequency());
assertEquals(dictionaryUsefulCheckColumnSize, options.getDictionaryUsefulCheckColumnSize());
assertEquals(stringMaxStatisticsLimit, options.getMaxStringStatisticsLimit());
assertEquals(maxCompressionBufferSize, options.getMaxCompressionBufferSize());
assertEquals(compressionLevel, options.getCompressionLevel());
assertEquals(streamLayoutFactory, options.getStreamLayoutFactory());
assertEquals(integerDictionaryEncodingEnabled, options.isIntegerDictionaryEncodingEnabled());
assertEquals(stringDictionarySortingEnabled, options.isStringDictionarySortingEnabled());
assertEquals(stringDictionaryEncodingEnabled, options.isStringDictionaryEncodingEnabled());
assertEquals(Optional.empty(), options.getDwrfStripeCacheOptions());
assertEquals(preserveDirectEncodingStripeCount, options.getPreserveDirectEncodingStripeCount());
assertEquals(options.getFlattenedColumns(), ImmutableSet.of(4, 3));
assertEquals(options.isMapStatisticsEnabled(), mapStatisticsEnabled);
assertEquals(options.getMaxFlattenedMapKeyCount(), maxFlattenedMapKeyCount);
}
@Test
public void testToString()
{
DataSize stripeMinSize = new DataSize(13, MEGABYTE);
DataSize stripeMaxSize = new DataSize(27, MEGABYTE);
int stripeMaxRowCount = 1_100_000;
int rowGroupMaxRowCount = 15_000;
DataSize dictionaryMaxMemory = new DataSize(13_000, KILOBYTE);
DataSize dictionaryMemoryRange = new DataSize(1_000, KILOBYTE);
int dictionaryUsefulCheckPerChunkFrequency = 9_999;
DataSize dictionaryUsefulCheckColumnSize = new DataSize(1, MEGABYTE);
DataSize stringMaxStatisticsLimit = new DataSize(128, BYTE);
DataSize maxCompressionBufferSize = new DataSize(512, KILOBYTE);
DataSize dwrfStripeCacheMaxSize = new DataSize(4, MEGABYTE);
DwrfStripeCacheMode dwrfStripeCacheMode = DwrfStripeCacheMode.INDEX_AND_FOOTER;
OptionalInt compressionLevel = OptionalInt.of(5);
StreamLayoutFactory streamLayoutFactory = new ColumnSizeLayoutFactory();
boolean integerDictionaryEncodingEnabled = false;
boolean stringDictionarySortingEnabled = true;
int preserveDirectEncodingStripeCount = 0;
boolean mapStatisticsEnabled = true;
int maxFlattenedMapKeyCount = 27;
boolean resetOutputBuffer = false;
boolean lazyOutputBuffer = false;
OrcWriterOptions writerOptions = OrcWriterOptions.builder()
.withFlushPolicy(DefaultOrcWriterFlushPolicy.builder()
.withStripeMinSize(stripeMinSize)
.withStripeMaxSize(stripeMaxSize)
.withStripeMaxRowCount(stripeMaxRowCount)
.build())
.withRowGroupMaxRowCount(rowGroupMaxRowCount)
.withDictionaryMaxMemory(dictionaryMaxMemory)
.withDictionaryMemoryAlmostFullRange(dictionaryMemoryRange)
.withDictionaryUsefulCheckPerChunkFrequency(dictionaryUsefulCheckPerChunkFrequency)
.withDictionaryUsefulCheckColumnSize(dictionaryUsefulCheckColumnSize)
.withMaxStringStatisticsLimit(stringMaxStatisticsLimit)
.withMaxCompressionBufferSize(maxCompressionBufferSize)
.withCompressionLevel(compressionLevel)
.withStreamLayoutFactory(streamLayoutFactory)
.withIntegerDictionaryEncodingEnabled(integerDictionaryEncodingEnabled)
.withStringDictionarySortingEnabled(stringDictionarySortingEnabled)
.withDwrfStripeCacheEnabled(true)
.withDwrfStripeCacheMaxSize(dwrfStripeCacheMaxSize)
.withDwrfStripeCacheMode(dwrfStripeCacheMode)
.withPreserveDirectEncodingStripeCount(preserveDirectEncodingStripeCount)
.withFlattenedColumns(ImmutableSet.of(4))
.withMapStatisticsEnabled(mapStatisticsEnabled)
.withMaxFlattenedMapKeyCount(maxFlattenedMapKeyCount)
.withResetOutputBuffer(resetOutputBuffer)
.withLazyOutputBuffer(lazyOutputBuffer)
.build();
String expectedString = "OrcWriterOptions{flushPolicy=DefaultOrcWriterFlushPolicy{stripeMaxRowCount=1100000, " +
"stripeMinBytes=13631488, stripeMaxBytes=28311552}, rowGroupMaxRowCount=15000, " +
"dictionaryMaxMemory=13000kB, dictionaryMemoryAlmostFullRange=1000kB, dictionaryUsefulCheckPerChunkFrequency=9999, " +
"dictionaryUsefulCheckColumnSize=1MB, maxStringStatisticsLimit=128B, maxCompressionBufferSize=512kB, " +
"compressionLevel=OptionalInt[5], streamLayoutFactory=ColumnSizeLayoutFactory{}, integerDictionaryEncodingEnabled=false, " +
"stringDictionarySortingEnabled=true, stringDictionaryEncodingEnabled=true, " +
"dwrfWriterOptions=Optional[DwrfStripeCacheOptions{stripeCacheMode=INDEX_AND_FOOTER, stripeCacheMaxSize=4MB}], " +
"ignoreDictionaryRowGroupSizes=false, preserveDirectEncodingStripeCount=0, flattenedColumns=[4], mapStatisticsEnabled=true, " +
"maxFlattenedMapKeyCount=27, resetOutputBuffer=false, lazyOutputBuffer=false}";
assertEquals(expectedString, writerOptions.toString());
}
}