TestTpchMetadata.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.tpch;
import com.facebook.presto.common.predicate.NullableValue;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.ConnectorTableLayoutResult;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.DoubleRange;
import com.facebook.presto.spi.statistics.Estimate;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.facebook.presto.tpch.util.PredicateUtils;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.tpch.PartColumn;
import io.airlift.tpch.TpchColumn;
import io.airlift.tpch.TpchTable;
import org.testng.annotations.Test;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Stream;
import static com.facebook.presto.spi.Constraint.alwaysFalse;
import static com.facebook.presto.spi.Constraint.alwaysTrue;
import static com.facebook.presto.tpch.TpchMetadata.getPrestoType;
import static com.facebook.presto.tpch.util.PredicateUtils.filterOutColumnFromPredicate;
import static com.google.common.collect.Iterables.getOnlyElement;
import static io.airlift.slice.Slices.utf8Slice;
import static io.airlift.tpch.CustomerColumn.MARKET_SEGMENT;
import static io.airlift.tpch.CustomerColumn.NAME;
import static io.airlift.tpch.LineItemColumn.LINE_NUMBER;
import static io.airlift.tpch.NationColumn.NATION_KEY;
import static io.airlift.tpch.OrderColumn.CLERK;
import static io.airlift.tpch.OrderColumn.ORDER_DATE;
import static io.airlift.tpch.OrderColumn.ORDER_KEY;
import static io.airlift.tpch.OrderColumn.ORDER_STATUS;
import static io.airlift.tpch.PartColumn.PART_KEY;
import static io.airlift.tpch.PartColumn.RETAIL_PRICE;
import static io.airlift.tpch.TpchTable.CUSTOMER;
import static io.airlift.tpch.TpchTable.LINE_ITEM;
import static io.airlift.tpch.TpchTable.NATION;
import static io.airlift.tpch.TpchTable.ORDERS;
import static io.airlift.tpch.TpchTable.PART;
import static io.airlift.tpch.TpchTable.PART_SUPPLIER;
import static io.airlift.tpch.TpchTable.REGION;
import static io.airlift.tpch.TpchTable.SUPPLIER;
import static java.lang.String.format;
import static java.util.Arrays.stream;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
public class TestTpchMetadata
{
private static final double TOLERANCE = 0.01;
private static final List<String> SUPPORTED_SCHEMAS = ImmutableList.of("tiny", "sf1");
private final TpchMetadata tpchMetadata = new TpchMetadata("tpch");
private final ConnectorSession session = null;
@Test
public void testTableStats()
{
SUPPORTED_SCHEMAS.forEach(schema -> {
double scaleFactor = TpchMetadata.schemaNameToScaleFactor(schema);
testTableStats(schema, REGION, 5);
testTableStats(schema, NATION, 25);
testTableStats(schema, SUPPLIER, 10_000 * scaleFactor);
testTableStats(schema, CUSTOMER, 150_000 * scaleFactor);
testTableStats(schema, PART, 200_000 * scaleFactor);
testTableStats(schema, PART_SUPPLIER, 800_000 * scaleFactor);
testTableStats(schema, ORDERS, 1_500_000 * scaleFactor);
testTableStats(schema, LINE_ITEM, 6_000_000 * scaleFactor);
});
}
@Test
public void testNoTableStats()
{
Stream.of("sf10").forEach(schema -> {
testNoTableStats(schema, REGION);
testNoTableStats(schema, NATION);
testNoTableStats(schema, SUPPLIER);
testNoTableStats(schema, CUSTOMER);
testNoTableStats(schema, PART);
testNoTableStats(schema, PART_SUPPLIER);
testNoTableStats(schema, ORDERS);
testNoTableStats(schema, LINE_ITEM);
});
}
@Test
public void testTableStatsWithConstraints()
{
SUPPORTED_SCHEMAS.forEach(schema -> {
double scaleFactor = TpchMetadata.schemaNameToScaleFactor(schema);
testTableStats(schema, ORDERS, alwaysFalse(), 0);
testTableStats(schema, ORDERS, constraint(ORDER_STATUS, "NO SUCH STATUS"), 0);
testTableStats(schema, ORDERS, constraint(ORDER_STATUS, "F"), 730_400 * scaleFactor);
testTableStats(schema, ORDERS, constraint(ORDER_STATUS, "O"), 733_300 * scaleFactor);
testTableStats(schema, ORDERS, constraint(ORDER_STATUS, "F", "NO SUCH STATUS"), 730_400 * scaleFactor);
testTableStats(schema, ORDERS, constraint(ORDER_STATUS, "F", "O", "P"), 1_500_000 * scaleFactor);
});
}
private void testTableStats(String schema, TpchTable<?> table, double expectedRowCount)
{
testTableStats(schema, table, alwaysTrue(), expectedRowCount);
}
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount)
{
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
List<ColumnHandle> columnHandles = ImmutableList.copyOf(tpchMetadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, constraint);
double actualRowCountValue = tableStatistics.getRowCount().getValue();
assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue));
assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE);
}
private void testNoTableStats(String schema, TpchTable<?> table)
{
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
List<ColumnHandle> columnHandles = ImmutableList.copyOf(tpchMetadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, alwaysTrue());
assertTrue(tableStatistics.getRowCount().isUnknown());
}
@Test
public void testColumnStats()
{
Stream.of("tiny", "sf1").forEach(schema -> {
double scaleFactor = TpchMetadata.schemaNameToScaleFactor(schema);
//id column
testColumnStats(schema, NATION, NATION_KEY, columnStatistics(25, 0, 24));
//foreign key to dictionary identifier columns
testColumnStats(schema, SUPPLIER, NATION_KEY, columnStatistics(25, 0, 24));
//foreign key to scalable identifier column
testColumnStats(schema, PART_SUPPLIER, PART_KEY, columnStatistics(200_000 * scaleFactor, 1, 200_000 * scaleFactor));
//low-valued numeric column
testColumnStats(schema, LINE_ITEM, LINE_NUMBER, columnStatistics(7, 1, 7));
//date
testColumnStats(schema, ORDERS, ORDER_DATE, columnStatistics(2_400, 8_035, 10_440));
//varchar and double columns
if (schema.equals("tiny")) {
testColumnStats(schema, CUSTOMER, MARKET_SEGMENT, columnStatistics(5, 13465));
testColumnStats(schema, CUSTOMER, NAME, columnStatistics(150_000 * scaleFactor, 27000));
testColumnStats(schema, PART, RETAIL_PRICE, columnStatistics(1_099, 901, 1900.99));
}
else if (schema.equals("sf1")) {
testColumnStats(schema, CUSTOMER, NAME, columnStatistics(150_000 * scaleFactor, 2700000));
testColumnStats(schema, PART, RETAIL_PRICE, columnStatistics(20899, 901, 2089.99));
testColumnStats(schema, CUSTOMER, MARKET_SEGMENT, columnStatistics(5, 1349610));
}
});
}
@Test
public void testColumnStatsWithConstraints()
{
SUPPORTED_SCHEMAS.forEach(schema -> {
double scaleFactor = TpchMetadata.schemaNameToScaleFactor(schema);
//Single constrained column has only one unique value
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "F"), columnStatistics(1), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "O"), columnStatistics(1), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "P"), columnStatistics(1), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
//only min and max values for non-scaling columns can be estimated for non-constrained columns
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "F"), rangeStatistics(3, 6_000_000 * scaleFactor));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "O"), rangeStatistics(1, 6_000_000 * scaleFactor));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "P"), rangeStatistics(65, 6_000_000 * scaleFactor));
//nothing can be said for always false constraints
testColumnStats(schema, ORDERS, ORDER_STATUS, alwaysFalse(), noColumnStatistics());
testColumnStats(schema, ORDERS, ORDER_KEY, alwaysFalse(), noColumnStatistics());
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "NO SUCH STATUS"), noColumnStatistics());
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "NO SUCH STATUS"), noColumnStatistics());
//unmodified stats are returned for the always true constraint
testColumnStats(schema, ORDERS, ORDER_STATUS, alwaysTrue(), columnStatistics(3), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_KEY, alwaysTrue(), columnStatistics(1_500_000 * scaleFactor, 1, 6_000_000 * scaleFactor));
//constraints on columns other than ORDER_STATUS are not supported and are ignored
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(CLERK, "NO SUCH CLERK"), columnStatistics(3), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(CLERK, "Clerk#000000001"), columnStatistics(1_500_000 * scaleFactor, 1, 6_000_000 * scaleFactor));
//compound constraints are supported
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "F", "NO SUCH STATUS"), columnStatistics(1), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "F", "NO SUCH STATUS"), rangeStatistics(3, 6_000_000 * scaleFactor));
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "F", "O"), columnStatistics(2), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "F", "O"), rangeStatistics(1, 6_000_000 * scaleFactor));
testColumnStats(schema, ORDERS, ORDER_STATUS, constraint(ORDER_STATUS, "F", "O", "P"), columnStatistics(3), EnumSet.of(ColumnStatisticsFields.DistinctValuesCount));
testColumnStats(schema, ORDERS, ORDER_KEY, constraint(ORDER_STATUS, "F", "O", "P"), columnStatistics(1_500_000 * scaleFactor, 1, 6_000_000 * scaleFactor));
});
}
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, ColumnStatistics expectedStatistics)
{
testColumnStats(schema, table, column, alwaysTrue(), expectedStatistics, EnumSet.allOf(ColumnStatisticsFields.class));
}
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expectedStatistics)
{
testColumnStats(schema, table, column, constraint, expectedStatistics, EnumSet.allOf(ColumnStatisticsFields.class));
}
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected,
EnumSet<ColumnStatisticsFields> fieldsToAssertOn)
{
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
List<ColumnHandle> columnHandles = ImmutableList.copyOf(tpchMetadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, constraint);
ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
if (fieldsToAssertOn.contains(ColumnStatisticsFields.DistinctValuesCount)) {
estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
}
if (fieldsToAssertOn.contains(ColumnStatisticsFields.DataSize)) {
estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
}
if (fieldsToAssertOn.contains(ColumnStatisticsFields.NullsFraction)) {
estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
}
if (fieldsToAssertOn.contains(ColumnStatisticsFields.Range)) {
estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
}
@Test
public void testOrdersOrderStatusPredicatePushdown()
{
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName("sf1", ORDERS.getTableName()));
TupleDomain<ColumnHandle> domain;
ConnectorTableLayoutResult tableLayout;
domain = fixedValueTupleDomain(tpchMetadata, ORDER_STATUS, utf8Slice("P"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, ORDER_STATUS)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(tableLayout.getTableLayout().getPredicate(), domain, session);
domain = fixedValueTupleDomain(tpchMetadata, ORDER_KEY, 42L);
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, ORDER_STATUS)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), domain, session);
assertTupleDomainEquals(
tableLayout.getTableLayout().getPredicate(),
// The most important thing about the expected value that it is NOT TupleDomain.none() (or equivalent).
// Using concrete expected value instead of checking TupleDomain::isNone to make sure the test doesn't pass on some other wrong value.
TupleDomain.columnWiseUnion(
fixedValueTupleDomain(tpchMetadata, ORDER_STATUS, utf8Slice("F")),
fixedValueTupleDomain(tpchMetadata, ORDER_STATUS, utf8Slice("O")),
fixedValueTupleDomain(tpchMetadata, ORDER_STATUS, utf8Slice("P"))),
session);
}
@Test
public void testPartTypeAndPartContainerPredicatePushdown()
{
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName("sf1", PART.getTableName()));
TupleDomain<ColumnHandle> domain;
ConnectorTableLayoutResult tableLayout;
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.TYPE, utf8Slice("SMALL BRUSHED COPPER"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.TYPE)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(
filterOutColumnFromPredicate(tableLayout.getTableLayout().getPredicate(), tpchMetadata.toColumnHandle(PartColumn.CONTAINER)),
domain,
session);
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.TYPE, utf8Slice("UNKNOWN"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.TYPE)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(tableLayout.getTableLayout().getPredicate(), TupleDomain.none(), session);
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.CONTAINER, utf8Slice("SM BAG"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.CONTAINER)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(
filterOutColumnFromPredicate(tableLayout.getTableLayout().getPredicate(), tpchMetadata.toColumnHandle(PartColumn.TYPE)),
domain,
session);
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.CONTAINER, utf8Slice("UNKNOWN"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.CONTAINER)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(tableLayout.getTableLayout().getPredicate(), TupleDomain.none(), session);
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.TYPE, utf8Slice("SMALL BRUSHED COPPER"), PartColumn.CONTAINER, utf8Slice("SM BAG"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.CONTAINER)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(tableLayout.getTableLayout().getPredicate(), domain, session);
domain = fixedValueTupleDomain(tpchMetadata, PartColumn.TYPE, utf8Slice("UNKNOWN"), PartColumn.CONTAINER, utf8Slice("UNKNOWN"));
tableLayout = getTableOnlyLayout(tpchMetadata, session, tableHandle, new Constraint<>(domain, convertToPredicate(domain, PartColumn.TYPE, PartColumn.CONTAINER)));
assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session);
assertTupleDomainEquals(tableLayout.getTableLayout().getPredicate(), TupleDomain.none(), session);
}
private Predicate<Map<ColumnHandle, NullableValue>> convertToPredicate(TupleDomain<ColumnHandle> domain, TpchColumn... columns)
{
Preconditions.checkArgument(columns.length > 0, "No columns given");
return bindings -> {
for (TpchColumn column : columns) {
ColumnHandle columnHandle = tpchMetadata.toColumnHandle(column);
if (bindings.containsKey(columnHandle)) {
NullableValue nullableValue = requireNonNull(bindings.get(columnHandle), "binding is null");
if (!PredicateUtils.convertToPredicate(domain, tpchMetadata.toColumnHandle(column)).test(nullableValue)) {
return false;
}
}
}
return true;
};
}
private void assertTupleDomainEquals(TupleDomain<?> actual, TupleDomain<?> expected, ConnectorSession session)
{
if (!Objects.equals(actual, expected)) {
fail(format("expected [%s] but found [%s]", expected.toString(session.getSqlFunctionProperties()), actual.toString(session.getSqlFunctionProperties())));
}
}
private Constraint<ColumnHandle> constraint(TpchColumn<?> column, String... values)
{
List<TupleDomain<ColumnHandle>> valueDomains = stream(values)
.map(value -> fixedValueTupleDomain(tpchMetadata, column, utf8Slice(value)))
.collect(toList());
TupleDomain<ColumnHandle> domain = TupleDomain.columnWiseUnion(valueDomains);
return new Constraint<>(domain, convertToPredicate(domain, column));
}
private static TupleDomain<ColumnHandle> fixedValueTupleDomain(TpchMetadata tpchMetadata, TpchColumn<?> column, Object value)
{
requireNonNull(column, "column is null");
requireNonNull(value, "value is null");
return TupleDomain.fromFixedValues(
ImmutableMap.of(tpchMetadata.toColumnHandle(column), new NullableValue(getPrestoType(column), value)));
}
private static TupleDomain<ColumnHandle> fixedValueTupleDomain(TpchMetadata tpchMetadata, TpchColumn<?> column1, Object value1, TpchColumn<?> column2, Object value2)
{
return TupleDomain.fromFixedValues(
ImmutableMap.of(
tpchMetadata.toColumnHandle(column1), new NullableValue(getPrestoType(column1), value1),
tpchMetadata.toColumnHandle(column2), new NullableValue(getPrestoType(column2), value2)));
}
private static ConnectorTableLayoutResult getTableOnlyLayout(TpchMetadata tpchMetadata, ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint)
{
List<ConnectorTableLayoutResult> tableLayouts = tpchMetadata.getTableLayouts(session, tableHandle, constraint, Optional.empty());
return getOnlyElement(tableLayouts);
}
private ColumnStatistics noColumnStatistics()
{
return createColumnStatistics(Optional.of(0.0), Optional.empty(), Optional.of(0.0));
}
private ColumnStatistics columnStatistics(double distinctValuesCount)
{
return createColumnStatistics(Optional.of(distinctValuesCount), Optional.empty(), Optional.empty());
}
private ColumnStatistics columnStatistics(double distinctValuesCount, double dataSize)
{
return createColumnStatistics(Optional.of(distinctValuesCount), Optional.empty(), Optional.of(dataSize));
}
private ColumnStatistics columnStatistics(double distinctValuesCount, double min, double max)
{
return createColumnStatistics(Optional.of(distinctValuesCount), Optional.of(new DoubleRange(min, max)), Optional.empty());
}
private ColumnStatistics rangeStatistics(double min, double max)
{
return createColumnStatistics(Optional.empty(), Optional.of(new DoubleRange(min, max)), Optional.empty());
}
private static ColumnStatistics createColumnStatistics(Optional<Double> distinctValuesCount, Optional<DoubleRange> range, Optional<Double> dataSize)
{
return ColumnStatistics.builder()
.setNullsFraction(Estimate.zero())
.setDistinctValuesCount(toEstimate(distinctValuesCount))
.setRange(range)
.setDataSize(toEstimate(dataSize))
.build();
}
private static Estimate toEstimate(Optional<Double> value)
{
return value
.map(Estimate::of)
.orElse(Estimate.unknown());
}
private enum ColumnStatisticsFields
{
DistinctValuesCount,
DataSize,
NullsFraction,
Range
}
}