TestOrcFileWriterConfig.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.metadata.DwrfStripeCacheMode;
import com.facebook.presto.orc.writer.StreamLayoutFactory.ColumnSizeLayoutFactory;
import com.facebook.presto.orc.writer.StreamLayoutFactory.StreamSizeLayoutFactory;
import com.google.common.collect.ImmutableMap;
import io.airlift.units.DataSize;
import org.testng.annotations.Test;

import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;

import static com.facebook.airlift.configuration.testing.ConfigAssertions.assertFullMapping;
import static com.facebook.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults;
import static com.facebook.airlift.configuration.testing.ConfigAssertions.recordDefaults;
import static com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType.BY_COLUMN_SIZE;
import static com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType.BY_STREAM_SIZE;
import static com.facebook.presto.orc.metadata.DwrfStripeCacheMode.FOOTER;
import static com.facebook.presto.orc.metadata.DwrfStripeCacheMode.INDEX;
import static com.facebook.presto.orc.metadata.DwrfStripeCacheMode.INDEX_AND_FOOTER;
import static io.airlift.units.DataSize.Unit.BYTE;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.lang.Math.toIntExact;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotSame;
import static org.testng.Assert.assertTrue;

public class TestOrcFileWriterConfig
{
    @Test
    public void testDefaults()
    {
        assertRecordedDefaults(recordDefaults(OrcFileWriterConfig.class)
                .setStripeMinSize(new DataSize(32, MEGABYTE))
                .setStripeMaxSize(new DataSize(64, MEGABYTE))
                .setStripeMaxRowCount(10_000_000)
                .setRowGroupMaxRowCount(10_000)
                .setDictionaryMaxMemory(new DataSize(16, MEGABYTE))
                .setStringStatisticsLimit(new DataSize(64, BYTE))
                .setMaxCompressionBufferSize(new DataSize(256, KILOBYTE))
                .setStreamLayoutType(BY_COLUMN_SIZE)
                .setDwrfStripeCacheEnabled(true)
                .setDwrfStripeCacheMaxSize(new DataSize(8, MEGABYTE))
                .setDwrfStripeCacheMode(INDEX_AND_FOOTER)
                .setCompressionLevel(Integer.MIN_VALUE)
                .setIntegerDictionaryEncodingEnabled(false)
                .setStringDictionaryEncodingEnabled(true)
                .setStringDictionarySortingEnabled(true)
                .setFlatMapWriterEnabled(false)
                .setAddHostnameToFileMetadataEnabled(true));
    }

    @Test
    public void testExplicitPropertyMappings()
    {
        Map<String, String> properties = new ImmutableMap.Builder<String, String>()
                .put("hive.orc.writer.stripe-min-size", "13MB")
                .put("hive.orc.writer.stripe-max-size", "27MB")
                .put("hive.orc.writer.stripe-max-rows", "44")
                .put("hive.orc.writer.row-group-max-rows", "11")
                .put("hive.orc.writer.dictionary-max-memory", "13MB")
                .put("hive.orc.writer.string-statistics-limit", "17MB")
                .put("hive.orc.writer.max-compression-buffer-size", "19MB")
                .put("hive.orc.writer.stream-layout-type", "BY_STREAM_SIZE")
                .put("hive.orc.writer.dwrf-stripe-cache-enabled", "false")
                .put("hive.orc.writer.dwrf-stripe-cache-max-size", "10MB")
                .put("hive.orc.writer.dwrf-stripe-cache-mode", "FOOTER")
                .put("hive.orc.writer.compression-level", "5")
                .put("hive.orc.writer.integer-dictionary-encoding-enabled", "true")
                .put("hive.orc.writer.string-dictionary-encoding-enabled", "false")
                .put("hive.orc.writer.string-dictionary-sorting-enabled", "false")
                .put("hive.orc.writer.flat-map-writer-enabled", "true")
                .put("hive.orc.writer.add-hostname-to-file-metadata-enabled", "false")
                .build();

        OrcFileWriterConfig expected = new OrcFileWriterConfig()
                .setStripeMinSize(new DataSize(13, MEGABYTE))
                .setStripeMaxSize(new DataSize(27, MEGABYTE))
                .setStripeMaxRowCount(44)
                .setRowGroupMaxRowCount(11)
                .setDictionaryMaxMemory(new DataSize(13, MEGABYTE))
                .setStringStatisticsLimit(new DataSize(17, MEGABYTE))
                .setMaxCompressionBufferSize(new DataSize(19, MEGABYTE))
                .setStreamLayoutType(BY_STREAM_SIZE)
                .setDwrfStripeCacheEnabled(false)
                .setDwrfStripeCacheMaxSize(new DataSize(10, MEGABYTE))
                .setDwrfStripeCacheMode(FOOTER)
                .setCompressionLevel(5)
                .setIntegerDictionaryEncodingEnabled(true)
                .setStringDictionaryEncodingEnabled(false)
                .setStringDictionarySortingEnabled(false)
                .setFlatMapWriterEnabled(true)
                .setAddHostnameToFileMetadataEnabled(false);

        assertFullMapping(properties, expected);
    }

    @Test
    public void testWithNoOptionsSet()
    {
        OrcFileWriterConfig config = new OrcFileWriterConfig();
        // should succeed.
        config.toOrcWriterOptionsBuilder().build();
    }

    @Test
    public void testOrcWriterOptionsBuilder()
    {
        DataSize stripeMinSize = new DataSize(10, MEGABYTE);
        DataSize stripeMaxSize = new DataSize(50, MEGABYTE);
        int stripeMaxRowCount = 1_000_000;
        int rowGroupMaxRowCount = 15_000;
        DataSize dictionaryMaxMemory = new DataSize(20, MEGABYTE);
        DataSize stringStatisticsLimit = new DataSize(32, BYTE);
        DataSize maxCompressionBufferSize = new DataSize(512, KILOBYTE);
        StreamLayoutType streamLayoutType = BY_STREAM_SIZE;
        DataSize dwrfStripeCacheMaxSize = new DataSize(4, MEGABYTE);
        DwrfStripeCacheMode dwrfStripeCacheMode = INDEX;
        int compressionLevel = 5;
        boolean flatMapWriterEnabled = true;

        OrcFileWriterConfig config = new OrcFileWriterConfig()
                .setStripeMinSize(stripeMinSize)
                .setStripeMaxSize(stripeMaxSize)
                .setStripeMaxRowCount(stripeMaxRowCount)
                .setRowGroupMaxRowCount(rowGroupMaxRowCount)
                .setDictionaryMaxMemory(dictionaryMaxMemory)
                .setStringStatisticsLimit(stringStatisticsLimit)
                .setMaxCompressionBufferSize(maxCompressionBufferSize)
                .setStreamLayoutType(streamLayoutType)
                .setDwrfStripeCacheEnabled(false)
                .setDwrfStripeCacheMaxSize(dwrfStripeCacheMaxSize)
                .setDwrfStripeCacheMode(dwrfStripeCacheMode)
                .setCompressionLevel(5)
                .setFlatMapWriterEnabled(flatMapWriterEnabled);

        assertEquals(stripeMinSize, config.getStripeMinSize());
        assertEquals(stripeMaxSize, config.getStripeMaxSize());
        assertEquals(stripeMaxRowCount, config.getStripeMaxRowCount());
        assertEquals(rowGroupMaxRowCount, config.getRowGroupMaxRowCount());
        assertEquals(dictionaryMaxMemory, config.getDictionaryMaxMemory());
        assertEquals(stringStatisticsLimit, config.getStringStatisticsLimit());
        assertEquals(maxCompressionBufferSize, config.getMaxCompressionBufferSize());
        assertEquals(streamLayoutType, config.getStreamLayoutType());
        assertFalse(config.isDwrfStripeCacheEnabled());
        assertEquals(dwrfStripeCacheMaxSize, config.getDwrfStripeCacheMaxSize());
        assertEquals(dwrfStripeCacheMode, config.getDwrfStripeCacheMode());
        assertEquals(compressionLevel, config.getCompressionLevel());
        assertEquals(flatMapWriterEnabled, config.isFlatMapWriterEnabled());

        assertNotSame(config.toOrcWriterOptionsBuilder(), config.toOrcWriterOptionsBuilder());
        OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();

        assertEquals(toIntExact(stripeMinSize.toBytes()), options.getFlushPolicy().getStripeMinBytes());
        assertEquals(toIntExact(stripeMaxSize.toBytes()), options.getFlushPolicy().getStripeMaxBytes());
        assertEquals(stripeMaxRowCount, options.getFlushPolicy().getStripeMaxRowCount());
        assertEquals(rowGroupMaxRowCount, options.getRowGroupMaxRowCount());
        assertEquals(dictionaryMaxMemory, options.getDictionaryMaxMemory());
        assertEquals(stringStatisticsLimit, options.getMaxStringStatisticsLimit());
        assertEquals(maxCompressionBufferSize, options.getMaxCompressionBufferSize());
        assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);
        assertEquals(Optional.empty(), options.getDwrfStripeCacheOptions());
        assertEquals(OptionalInt.of(compressionLevel), options.getCompressionLevel());
    }

    @Test
    public void testStreamLayoutOption()
    {
        OrcFileWriterConfig config = new OrcFileWriterConfig();

        config.setStreamLayoutType(BY_STREAM_SIZE);
        OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();
        assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);

        config.setStreamLayoutType(BY_COLUMN_SIZE);
        options = config.toOrcWriterOptionsBuilder().build();
        assertTrue(options.getStreamLayoutFactory() instanceof ColumnSizeLayoutFactory);
    }

    @Test
    public void testDefaultCompressionLevel()
    {
        OrcFileWriterConfig config = new OrcFileWriterConfig();
        OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();

        assertEquals(OptionalInt.empty(), options.getCompressionLevel());
    }

    @Test
    public void testAddHostnameToFileMetadata()
    {
        OrcFileWriterConfig config = new OrcFileWriterConfig();
        config.setAddHostnameToFileMetadataEnabled(false);
        assertFalse(config.isAddHostnameToFileMetadataEnabled());

        config.setAddHostnameToFileMetadataEnabled(true);
        assertTrue(config.isAddHostnameToFileMetadataEnabled());
    }
}