TestDwrfMetadataWriter.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.metadata;

import com.facebook.presto.orc.proto.DwrfProto;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedMap;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.List;
import java.util.Optional;

import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT;
import static com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DWRF_MAP_FLAT;
import static com.facebook.presto.orc.metadata.DwrfMetadataWriter.toColumnEncoding;
import static com.facebook.presto.orc.metadata.DwrfMetadataWriter.toColumnEncodings;
import static com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStream;
import static com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStreamKind;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_COUNT;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.IN_MAP;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_INDEX;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.SECONDARY;
import static com.facebook.presto.orc.protobuf.ByteString.copyFromUtf8;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.expectThrows;

public class TestDwrfMetadataWriter
{
    private static final int COLUMN_ID = 3;

    @Test
    public void testToColumnEncodingDirect()
    {
        int expectedDictionarySize = 0;
        ColumnEncoding columnEncoding = new ColumnEncoding(DIRECT, expectedDictionarySize);

        DwrfProto.ColumnEncoding actual = toColumnEncoding(COLUMN_ID, columnEncoding);

        assertEquals(actual.getColumn(), COLUMN_ID);
        assertEquals(actual.getKind(), DwrfProto.ColumnEncoding.Kind.DIRECT);
        assertEquals(actual.getDictionarySize(), expectedDictionarySize);
        assertEquals(actual.getSequence(), 0);
    }

    @Test
    public void testToColumnEncodingDictionary()
    {
        int expectedDictionarySize = 5;
        ColumnEncoding columnEncoding = new ColumnEncoding(DICTIONARY, expectedDictionarySize);

        DwrfProto.ColumnEncoding actual = toColumnEncoding(COLUMN_ID, columnEncoding);

        assertEquals(actual.getColumn(), COLUMN_ID);
        assertEquals(actual.getKind(), DwrfProto.ColumnEncoding.Kind.DICTIONARY);
        assertEquals(actual.getDictionarySize(), expectedDictionarySize);
        assertEquals(actual.getSequence(), 0);
    }

    @Test
    public void testToColumnEncodingFlatMap()
    {
        int expectedDictionarySize = 0;
        ColumnEncoding columnEncoding = new ColumnEncoding(DWRF_MAP_FLAT, expectedDictionarySize);

        DwrfProto.ColumnEncoding actual = toColumnEncoding(COLUMN_ID, columnEncoding);

        assertEquals(actual.getColumn(), COLUMN_ID);
        assertEquals(actual.getKind(), DwrfProto.ColumnEncoding.Kind.MAP_FLAT);
        assertEquals(actual.getDictionarySize(), expectedDictionarySize);
        assertEquals(actual.getSequence(), 0);
    }

    @DataProvider
    public static Object[][] sequenceKeyProvider()
    {
        return new Object[][] {
                {DwrfProto.KeyInfo.newBuilder().setIntKey(1).build(), DwrfProto.KeyInfo.newBuilder().setIntKey(5).build()},
                {DwrfProto.KeyInfo.newBuilder().setBytesKey(copyFromUtf8("key1")).build(), DwrfProto.KeyInfo.newBuilder().setBytesKey(copyFromUtf8("key2")).build()}
        };
    }

    @Test(dataProvider = "sequenceKeyProvider")
    public void testToColumnEncodingsWithSequence(DwrfProto.KeyInfo key1, DwrfProto.KeyInfo key2)
    {
        int expectedDictionarySize1 = 5;
        int expectedSequenceId1 = 0;
        ColumnEncoding valueEncoding1 = new ColumnEncoding(DIRECT, expectedDictionarySize1);
        DwrfSequenceEncoding sequenceEncoding1 = new DwrfSequenceEncoding(key1, valueEncoding1);

        int expectedDictionarySize2 = 10;
        int expectedSequenceId2 = 5;
        ColumnEncoding valueEncoding2 = new ColumnEncoding(DICTIONARY, expectedDictionarySize2);
        DwrfSequenceEncoding sequenceEncoding2 = new DwrfSequenceEncoding(key2, valueEncoding2);

        ImmutableSortedMap<Integer, DwrfSequenceEncoding> additionalSequenceEncodings = ImmutableSortedMap.of(
                expectedSequenceId1, sequenceEncoding1,
                expectedSequenceId2, sequenceEncoding2);
        ColumnEncoding columnEncoding = new ColumnEncoding(DIRECT, 0, Optional.of(additionalSequenceEncodings));

        List<DwrfProto.ColumnEncoding> actual = toColumnEncodings(ImmutableMap.of(COLUMN_ID, columnEncoding));
        assertEquals(actual.size(), 2);

        DwrfProto.ColumnEncoding actualValueEncoding1 = actual.get(0);
        assertEquals(actualValueEncoding1.getColumn(), COLUMN_ID);
        assertEquals(actualValueEncoding1.getKind(), DwrfProto.ColumnEncoding.Kind.DIRECT);
        assertEquals(actualValueEncoding1.getDictionarySize(), expectedDictionarySize1);

        assertEquals(actualValueEncoding1.getSequence(), expectedSequenceId1);
        assertEquals(actualValueEncoding1.getKey(), key1);

        DwrfProto.ColumnEncoding actualValueEncoding2 = actual.get(1);
        assertEquals(actualValueEncoding2.getColumn(), COLUMN_ID);
        assertEquals(actualValueEncoding2.getKind(), DwrfProto.ColumnEncoding.Kind.DICTIONARY);
        assertEquals(actualValueEncoding2.getDictionarySize(), expectedDictionarySize2);
        assertEquals(actualValueEncoding2.getSequence(), expectedSequenceId2);
        assertEquals(actualValueEncoding2.getKey(), key2);
    }

    @Test
    public void testToColumnEncodingsWithInvalidDeeplyNestedAdditionalSequence()
    {
        DwrfProto.KeyInfo key1 = DwrfProto.KeyInfo.newBuilder().setIntKey(1).build();
        DwrfProto.KeyInfo key2 = DwrfProto.KeyInfo.newBuilder().setIntKey(2).build();

        // level 2
        ColumnEncoding deeplyNestedValueEncoding = new ColumnEncoding(DIRECT, 0);
        DwrfSequenceEncoding deeplyNestedSequenceEncoding = new DwrfSequenceEncoding(key1, deeplyNestedValueEncoding);
        ImmutableSortedMap<Integer, DwrfSequenceEncoding> deeplyNestedSequenceEncodings = ImmutableSortedMap.of(0, deeplyNestedSequenceEncoding);

        // level 1
        ColumnEncoding nestedColumnEncoding = new ColumnEncoding(DIRECT, 0, Optional.of(deeplyNestedSequenceEncodings));
        DwrfSequenceEncoding nestedSequenceEncoding = new DwrfSequenceEncoding(key2, nestedColumnEncoding);
        ImmutableSortedMap<Integer, DwrfSequenceEncoding> nestedSequenceEncodings = ImmutableSortedMap.of(0, nestedSequenceEncoding);

        // root
        ColumnEncoding columnEncoding = new ColumnEncoding(DIRECT, 0, Optional.of(nestedSequenceEncodings));

        expectThrows(IllegalArgumentException.class, () -> toColumnEncodings(ImmutableMap.of(COLUMN_ID, columnEncoding)));
    }

    @Test
    public void testToStreamKind()
    {
        assertEquals(toStreamKind(PRESENT), DwrfProto.Stream.Kind.PRESENT);
        assertEquals(toStreamKind(IN_MAP), DwrfProto.Stream.Kind.IN_MAP);
        assertEquals(toStreamKind(DATA), DwrfProto.Stream.Kind.DATA);
        assertEquals(toStreamKind(SECONDARY), DwrfProto.Stream.Kind.NANO_DATA);
        assertEquals(toStreamKind(LENGTH), DwrfProto.Stream.Kind.LENGTH);
        assertEquals(toStreamKind(DICTIONARY_DATA), DwrfProto.Stream.Kind.DICTIONARY_DATA);
        assertEquals(toStreamKind(DICTIONARY_COUNT), DwrfProto.Stream.Kind.DICTIONARY_COUNT);
        assertEquals(toStreamKind(ROW_INDEX), DwrfProto.Stream.Kind.ROW_INDEX);
    }

    @Test
    public void testToStream()
    {
        int expectedSequence = 10;
        int expectedLength = 15;
        long expectedOffset = 25;
        boolean expectedUseVints = true;

        Stream stream = new Stream(COLUMN_ID, expectedSequence, DATA, expectedLength, expectedUseVints);
        DwrfProto.Stream actual = toStream(stream);
        assertEquals(actual.getColumn(), COLUMN_ID);
        assertEquals(actual.getSequence(), expectedSequence);
        assertEquals(actual.getKind(), DwrfProto.Stream.Kind.DATA);
        assertEquals(actual.getLength(), expectedLength);
        assertTrue(actual.getUseVInts());
        assertFalse(actual.hasOffset());

        stream = new Stream(COLUMN_ID, DATA, expectedLength, expectedUseVints, expectedSequence, Optional.of(expectedOffset));
        actual = toStream(stream);
        assertEquals(actual.getColumn(), COLUMN_ID);
        assertEquals(actual.getSequence(), expectedSequence);
        assertEquals(actual.getKind(), DwrfProto.Stream.Kind.DATA);
        assertEquals(actual.getLength(), expectedLength);
        assertTrue(actual.getUseVInts());
        assertEquals(actual.getOffset(), expectedOffset);
    }
}