TestMapColumnStatisticsBuilder.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;
import com.facebook.presto.orc.proto.DwrfProto.KeyInfo;
import com.facebook.presto.orc.protobuf.ByteString;
import com.google.common.collect.ImmutableList;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
public class TestMapColumnStatisticsBuilder
{
private static final KeyInfo INT_KEY1 = KeyInfo.newBuilder().setIntKey(1).build();
private static final KeyInfo INT_KEY2 = KeyInfo.newBuilder().setIntKey(2).build();
private static final KeyInfo INT_KEY3 = KeyInfo.newBuilder().setIntKey(3).build();
private static final KeyInfo STRING_KEY1 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s1")).build();
private static final KeyInfo STRING_KEY2 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s2")).build();
private static final KeyInfo STRING_KEY3 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s3")).build();
@DataProvider
public Object[][] keySupplier()
{
return new Object[][] {
{INT_KEY1, INT_KEY2, INT_KEY3},
{STRING_KEY1, STRING_KEY2, STRING_KEY3},
};
}
@Test
public void testAddEmptyMapStatistics()
{
MapColumnStatisticsBuilder builder = new MapColumnStatisticsBuilder(true);
ColumnStatistics columnStatistics = builder.buildColumnStatistics();
assertEquals(columnStatistics.getClass(), ColumnStatistics.class);
assertEquals(columnStatistics.getNumberOfValues(), 0);
assertNull(columnStatistics.getMapStatistics());
}
@Test(dataProvider = "keySupplier")
public void testAddMapStatistics(KeyInfo[] keys)
{
KeyInfo key1 = keys[0];
KeyInfo key2 = keys[1];
ColumnStatistics columnStatistics1 = new ColumnStatistics(3L, null, null, null);
ColumnStatistics columnStatistics2 = new ColumnStatistics(5L, null, null, null);
MapColumnStatisticsBuilder builder = new MapColumnStatisticsBuilder(true);
builder.addMapStatistics(key1, columnStatistics1);
builder.addMapStatistics(key2, columnStatistics2);
builder.increaseValueCount(10);
MapColumnStatistics columnStatistics = (MapColumnStatistics) builder.buildColumnStatistics();
assertEquals(columnStatistics.getNumberOfValues(), 10L);
MapStatistics mapStatistics = columnStatistics.getMapStatistics();
List<MapStatisticsEntry> entries = mapStatistics.getEntries();
assertEquals(entries.size(), 2);
assertEquals(entries.get(0).getKey(), key1);
assertEquals(entries.get(0).getColumnStatistics(), columnStatistics1);
assertEquals(entries.get(1).getKey(), key2);
assertEquals(entries.get(1).getColumnStatistics(), columnStatistics2);
}
// test a case when keys are carried of from one row group, to another row
// group having all null entries
@Test(dataProvider = "keySupplier")
public void testAddMapStatisticsNoValues(KeyInfo[] keys)
{
KeyInfo key1 = keys[0];
KeyInfo key2 = keys[1];
ColumnStatistics columnStatistics1 = new ColumnStatistics(3L, null, null, null);
ColumnStatistics columnStatistics2 = new ColumnStatistics(5L, null, null, null);
MapColumnStatisticsBuilder builder = new MapColumnStatisticsBuilder(true);
builder.addMapStatistics(key1, columnStatistics1);
builder.addMapStatistics(key2, columnStatistics2);
builder.increaseValueCount(0);
MapColumnStatistics columnStatistics = (MapColumnStatistics) builder.buildColumnStatistics();
assertEquals(columnStatistics.getNumberOfValues(), 0);
MapStatistics mapStatistics = columnStatistics.getMapStatistics();
List<MapStatisticsEntry> entries = mapStatistics.getEntries();
assertEquals(entries.size(), 2);
assertEquals(entries.get(0).getKey(), key1);
assertEquals(entries.get(0).getColumnStatistics(), columnStatistics1);
assertEquals(entries.get(1).getKey(), key2);
assertEquals(entries.get(1).getColumnStatistics(), columnStatistics2);
}
@Test(dataProvider = "keySupplier")
public void testMergeMapStatistics(KeyInfo[] keys)
{
// merge two stats with keys: [k0,k1] and [k1,k2]
// column statistics for k1 should be merged together
MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(true);
builder1.addMapStatistics(keys[0], new IntegerColumnStatistics(3L, null, null, null, new IntegerStatistics(1L, 2L, 3L)));
builder1.addMapStatistics(keys[1], new IntegerColumnStatistics(5L, null, null, null, new IntegerStatistics(10L, 20L, 30L)));
builder1.increaseValueCount(8);
ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics();
MapColumnStatisticsBuilder builder2 = new MapColumnStatisticsBuilder(true);
builder2.addMapStatistics(keys[1], new IntegerColumnStatistics(7L, null, null, null, new IntegerStatistics(25L, 95L, 100L)));
builder2.addMapStatistics(keys[2], new IntegerColumnStatistics(9L, null, null, null, new IntegerStatistics(12L, 22L, 32L)));
builder2.increaseValueCount(16);
ColumnStatistics columnStatistics2 = builder2.buildColumnStatistics();
MapStatistics mergedMapStatistics = MapColumnStatisticsBuilder.mergeMapStatistics(ImmutableList.of(columnStatistics1, columnStatistics2), null).get();
assertMergedMapStatistics(keys, mergedMapStatistics);
}
@Test(dataProvider = "keySupplier")
public void testMergeMapStatisticsMissingStats(KeyInfo[] keys)
{
// valid map stat
MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(true);
builder1.addMapStatistics(keys[0], new ColumnStatistics(3L, null, null, null));
builder1.increaseValueCount(3);
ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics();
// invalid map stat
ColumnStatistics columnStatistics2 = new ColumnStatistics(7L, null, null, null);
Optional<MapStatistics> mergedMapStats = MapColumnStatisticsBuilder.mergeMapStatistics(ImmutableList.of(columnStatistics1, columnStatistics2), null);
assertFalse(mergedMapStats.isPresent());
}
@Test(dataProvider = "keySupplier")
public void testMergeColumnStatistics(KeyInfo[] keys)
{
// merge two stats with keys: [k0,k1] and [k1,k2]
// column statistics for k1 should be merged together
MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(true);
builder1.addMapStatistics(keys[0], new IntegerColumnStatistics(3L, null, null, null, new IntegerStatistics(1L, 2L, 3L)));
builder1.addMapStatistics(keys[1], new IntegerColumnStatistics(5L, null, null, null, new IntegerStatistics(10L, 20L, 30L)));
builder1.increaseValueCount(10);
ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics();
MapColumnStatisticsBuilder builder2 = new MapColumnStatisticsBuilder(true);
builder2.addMapStatistics(keys[1], new IntegerColumnStatistics(7L, null, null, null, new IntegerStatistics(25L, 95L, 100L)));
builder2.addMapStatistics(keys[2], new IntegerColumnStatistics(9L, null, null, null, new IntegerStatistics(12L, 22L, 32L)));
builder2.increaseValueCount(20);
ColumnStatistics columnStatistics2 = builder2.buildColumnStatistics();
ColumnStatistics mergedColumnStatistics = ColumnStatistics.mergeColumnStatistics(ImmutableList.of(columnStatistics1, columnStatistics2));
assertEquals(mergedColumnStatistics.getNumberOfValues(), 30);
MapStatistics mergedMapStatistics = mergedColumnStatistics.getMapStatistics();
assertMergedMapStatistics(keys, mergedMapStatistics);
}
private void assertMergedMapStatistics(KeyInfo[] keys, MapStatistics mergedMapStatistics)
{
assertNotNull(mergedMapStatistics);
List<MapStatisticsEntry> entries = mergedMapStatistics.getEntries();
assertEquals(entries.size(), 3);
Map<KeyInfo, ColumnStatistics> columnStatisticsByKey = new HashMap<>();
for (MapStatisticsEntry entry : entries) {
columnStatisticsByKey.put(entry.getKey(), entry.getColumnStatistics());
}
assertEquals(columnStatisticsByKey.get(keys[0]), new IntegerColumnStatistics(3L, null, null, null, new IntegerStatistics(1L, 2L, 3L)));
assertEquals(columnStatisticsByKey.get(keys[1]), new IntegerColumnStatistics(12L, null, null, null, new IntegerStatistics(10L, 95L, 130L))); // merged stats
assertEquals(columnStatisticsByKey.get(keys[2]), new IntegerColumnStatistics(9L, null, null, null, new IntegerStatistics(12L, 22L, 32L)));
}
}