TestBinaryStatisticsBuilder.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;
import com.facebook.presto.common.block.VariableWidthBlockBuilder;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.orc.metadata.statistics.AbstractStatisticsBuilderTest.StatisticsType.NONE;
import static com.facebook.presto.orc.metadata.statistics.BinaryStatistics.BINARY_VALUE_BYTES_OVERHEAD;
import static com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics;
import static io.airlift.slice.Slices.EMPTY_SLICE;
import static io.airlift.slice.Slices.utf8Slice;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertNull;
public class TestBinaryStatisticsBuilder
extends AbstractStatisticsBuilderTest<BinaryStatisticsBuilder, Slice>
{
private static final Slice FIRST_VALUE = utf8Slice("apple");
private static final Slice SECOND_VALUE = utf8Slice("banana");
public TestBinaryStatisticsBuilder()
{
super(NONE, BinaryStatisticsBuilder::new, TestBinaryStatisticsBuilder::addValue);
}
@Test
public void testMinMaxValues()
{
// order doesn't matter because there is no min and max for binary
assertMinMaxValues(EMPTY_SLICE, EMPTY_SLICE);
assertMinMaxValues(FIRST_VALUE, SECOND_VALUE);
assertMinMaxValues(SECOND_VALUE, FIRST_VALUE);
}
@Test
public void testSum()
{
BinaryStatisticsBuilder binaryStatisticsBuilder = new BinaryStatisticsBuilder();
for (Slice value : ImmutableList.of(EMPTY_SLICE, FIRST_VALUE, SECOND_VALUE)) {
addValue(binaryStatisticsBuilder, value);
}
assertBinaryStatistics(binaryStatisticsBuilder.buildColumnStatistics(), 3, EMPTY_SLICE.length() + FIRST_VALUE.length() + SECOND_VALUE.length());
}
@Test
public void testBlockBinaryStatistics()
{
String alphabets = "abcdefghijklmnopqrstuvwxyz";
VariableWidthBlockBuilder blockBuilder = new VariableWidthBlockBuilder(null, alphabets.length(), alphabets.length());
Slice slice = utf8Slice(alphabets);
for (int i = 0; i < slice.length(); i++) {
VARBINARY.writeSlice(blockBuilder, slice, i, 1);
}
blockBuilder.appendNull();
BinaryStatisticsBuilder binaryStatisticsBuilder = new BinaryStatisticsBuilder();
binaryStatisticsBuilder.addBlock(VARBINARY, blockBuilder);
BinaryStatistics binaryStatistics = binaryStatisticsBuilder.buildColumnStatistics().getBinaryStatistics();
assertEquals(binaryStatistics.getSum(), slice.length());
}
@Test
public void testAddValueByPosition()
{
String alphabet = "abcdefghijklmnopqrstuvwxyz";
VariableWidthBlockBuilder blockBuilder = new VariableWidthBlockBuilder(null, alphabet.length(), alphabet.length());
Slice slice = utf8Slice(alphabet);
for (int i = 0; i < slice.length(); i++) {
VARBINARY.writeSlice(blockBuilder, slice, i, 1);
}
blockBuilder.appendNull();
BinaryStatisticsBuilder statisticsBuilder = new BinaryStatisticsBuilder();
int positionCount = blockBuilder.getPositionCount();
for (int position = 0; position < positionCount; position++) {
statisticsBuilder.addValue(VARBINARY, blockBuilder, position);
}
ColumnStatistics columnStatistics = statisticsBuilder.buildColumnStatistics();
assertEquals(columnStatistics.getNumberOfValues(), positionCount - 1);
BinaryStatistics binaryStatistics = columnStatistics.getBinaryStatistics();
assertEquals(binaryStatistics.getSum(), slice.length());
}
@Test
public void testMerge()
{
List<ColumnStatistics> statisticsList = new ArrayList<>();
BinaryStatisticsBuilder statisticsBuilder = new BinaryStatisticsBuilder();
statisticsList.add(statisticsBuilder.buildColumnStatistics());
assertMergedBinaryStatistics(statisticsList, 0, 0);
addValue(statisticsBuilder, EMPTY_SLICE);
statisticsList.add(statisticsBuilder.buildColumnStatistics());
assertMergedBinaryStatistics(statisticsList, 1, 0);
addValue(statisticsBuilder, FIRST_VALUE);
statisticsList.add(statisticsBuilder.buildColumnStatistics());
assertMergedBinaryStatistics(statisticsList, 3, FIRST_VALUE.length());
addValue(statisticsBuilder, SECOND_VALUE);
statisticsList.add(statisticsBuilder.buildColumnStatistics());
assertMergedBinaryStatistics(statisticsList, 6, FIRST_VALUE.length() * 2L + SECOND_VALUE.length());
}
@Test
public void testTotalValueBytes()
{
assertTotalValueBytes(0L, ImmutableList.of());
assertTotalValueBytes(BINARY_VALUE_BYTES_OVERHEAD, ImmutableList.of(EMPTY_SLICE));
assertTotalValueBytes(FIRST_VALUE.length() + BINARY_VALUE_BYTES_OVERHEAD, ImmutableList.of(FIRST_VALUE));
assertTotalValueBytes((FIRST_VALUE.length() + SECOND_VALUE.length()) + 2 * BINARY_VALUE_BYTES_OVERHEAD, ImmutableList.of(FIRST_VALUE, SECOND_VALUE));
}
@Test
public void testEqualsAndHashCode()
{
BinaryStatisticsBuilder statisticsBuilder = new BinaryStatisticsBuilder();
addValue(statisticsBuilder, FIRST_VALUE);
ColumnStatistics statA1 = statisticsBuilder.buildColumnStatistics();
ColumnStatistics statA2 = statisticsBuilder.buildColumnStatistics();
assertEquals(statA1, statA2);
assertEquals(statA1.hashCode(), statA2.hashCode());
addValue(statisticsBuilder, SECOND_VALUE);
ColumnStatistics statB1 = statisticsBuilder.buildColumnStatistics();
assertNotEquals(statA1, statB1);
assertNotEquals(statA1.hashCode(), statB1.hashCode());
}
private void assertMergedBinaryStatistics(List<ColumnStatistics> statisticsList, int expectedNumberOfValues, long expectedSum)
{
assertBinaryStatistics(mergeColumnStatistics(statisticsList), expectedNumberOfValues, expectedSum);
assertNoColumnStatistics(mergeColumnStatistics(insertEmptyColumnStatisticsAt(statisticsList, 0, 10)), expectedNumberOfValues + 10);
assertNoColumnStatistics(mergeColumnStatistics(insertEmptyColumnStatisticsAt(statisticsList, statisticsList.size(), 10)), expectedNumberOfValues + 10);
assertNoColumnStatistics(mergeColumnStatistics(insertEmptyColumnStatisticsAt(statisticsList, statisticsList.size() / 2, 10)), expectedNumberOfValues + 10);
}
private void assertBinaryStatistics(ColumnStatistics columnStatistics, int expectedNumberOfValues, long expectedSum)
{
if (expectedNumberOfValues > 0) {
assertEquals(columnStatistics.getNumberOfValues(), expectedNumberOfValues);
assertEquals(columnStatistics.getBinaryStatistics().getSum(), expectedSum);
}
else {
assertNull(columnStatistics.getBinaryStatistics());
assertEquals(columnStatistics.getNumberOfValues(), 0);
}
}
public static void addValue(BinaryStatisticsBuilder binaryStatisticsBuilder, Slice slice)
{
VariableWidthBlockBuilder blockBuilder = new VariableWidthBlockBuilder(null, 1, slice.length());
blockBuilder.writeBytes(slice, 0, slice.length()).closeEntry();
binaryStatisticsBuilder.addBlock(VARBINARY, blockBuilder);
}
}