TestStatsNormalizer.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.cost;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.metadata.FunctionAndTypeManager;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.testing.TestingConnectorSession;
import com.google.common.collect.ImmutableList;
import org.testng.annotations.Test;
import java.time.LocalDate;
import java.util.Optional;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.DateType.DATE;
import static com.facebook.presto.common.type.DecimalType.createDecimalType;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.cost.StatsUtil.toStatsRepresentation;
import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager;
import static java.lang.Double.NaN;
import static java.util.Collections.emptyList;
public class TestStatsNormalizer
{
private final FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
private final ConnectorSession session = new TestingConnectorSession(emptyList());
private final StatsNormalizer normalizer = new StatsNormalizer();
@Test
public void testNoCapping()
{
VariableReferenceExpression a = new VariableReferenceExpression(Optional.empty(), "a", BIGINT);
PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder()
.setOutputRowCount(30)
.setTotalSize(120)
.addVariableStatistics(a, VariableStatsEstimate.builder().setDistinctValuesCount(20).build())
.build();
assertNormalized(estimate)
.totalSize(120)
.variableStats(a, variableAssert -> variableAssert.distinctValuesCount(20));
}
@Test
public void testDropNonOutputSymbols()
{
VariableReferenceExpression a = new VariableReferenceExpression(Optional.empty(), "a", BIGINT);
VariableReferenceExpression b = new VariableReferenceExpression(Optional.empty(), "b", BIGINT);
VariableReferenceExpression c = new VariableReferenceExpression(Optional.empty(), "c", BIGINT);
PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder()
.setOutputRowCount(40)
.setTotalSize(160)
.addVariableStatistics(a, VariableStatsEstimate.builder().setDistinctValuesCount(20).build())
.addVariableStatistics(b, VariableStatsEstimate.builder().setDistinctValuesCount(30).build())
.addVariableStatistics(c, VariableStatsEstimate.unknown())
.build();
PlanNodeStatsAssertion.assertThat(normalizer.normalize(estimate, ImmutableList.of(b, c)))
.totalSize(160)
.variablesWithKnownStats(b)
.variableStats(b, variableAssert -> variableAssert.distinctValuesCount(30));
}
@Test
public void tesCapDistinctValuesByOutputRowCount()
{
VariableReferenceExpression a = new VariableReferenceExpression(Optional.empty(), "a", BIGINT);
VariableReferenceExpression b = new VariableReferenceExpression(Optional.empty(), "b", BIGINT);
VariableReferenceExpression c = new VariableReferenceExpression(Optional.empty(), "c", BIGINT);
PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder()
.addVariableStatistics(a, VariableStatsEstimate.builder().setNullsFraction(0).setDistinctValuesCount(20).build())
.addVariableStatistics(b, VariableStatsEstimate.builder().setNullsFraction(0.4).setDistinctValuesCount(20).build())
.addVariableStatistics(c, VariableStatsEstimate.unknown())
.setOutputRowCount(10)
.setTotalSize(40)
.build();
assertNormalized(estimate)
.totalSize(40)
.variableStats(a, variableAssert -> variableAssert.distinctValuesCount(10))
.variableStats(b, variableAssert -> variableAssert.distinctValuesCount(8))
.variableStats(c, VariableStatsAssertion::distinctValuesCountUnknown);
}
@Test
public void testCapDistinctValuesByToDomainRangeLength()
{
testCapDistinctValuesByToDomainRangeLength(INTEGER, 15, 1, 5, 5);
testCapDistinctValuesByToDomainRangeLength(INTEGER, 2_0000_000_000., 1, 1_000_000_000, 1_000_000_000);
testCapDistinctValuesByToDomainRangeLength(INTEGER, 3, 1, 5, 3);
testCapDistinctValuesByToDomainRangeLength(INTEGER, NaN, 1, 5, NaN);
testCapDistinctValuesByToDomainRangeLength(BIGINT, 15, 1, 5, 5);
testCapDistinctValuesByToDomainRangeLength(SMALLINT, 15, 1, 5, 5);
testCapDistinctValuesByToDomainRangeLength(TINYINT, 15, 1, 5, 5);
testCapDistinctValuesByToDomainRangeLength(createDecimalType(10, 2), 11, 1, 1, 1);
testCapDistinctValuesByToDomainRangeLength(createDecimalType(10, 2), 13, 101, 103, 3);
testCapDistinctValuesByToDomainRangeLength(createDecimalType(10, 2), 10, 100, 200, 10);
testCapDistinctValuesByToDomainRangeLength(DOUBLE, 42, 10.1, 10.2, 42);
testCapDistinctValuesByToDomainRangeLength(DOUBLE, 42, 10.1, 10.1, 1);
testCapDistinctValuesByToDomainRangeLength(BOOLEAN, 11, true, true, 1);
testCapDistinctValuesByToDomainRangeLength(BOOLEAN, 12, false, true, 2);
testCapDistinctValuesByToDomainRangeLength(
DATE,
12,
LocalDate.of(2017, 8, 31).toEpochDay(),
LocalDate.of(2017, 9, 2).toEpochDay(),
3);
}
private void testCapDistinctValuesByToDomainRangeLength(Type type, double ndv, Object low, Object high, double expectedNormalizedNdv)
{
VariableReferenceExpression variable = new VariableReferenceExpression(Optional.empty(), "x", type);
VariableStatsEstimate symbolStats = VariableStatsEstimate.builder()
.setNullsFraction(0)
.setDistinctValuesCount(ndv)
.setLowValue(asStatsValue(low, type))
.setHighValue(asStatsValue(high, type))
.build();
PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder()
.setOutputRowCount(10000000000L)
.setTotalSize(40000000000L)
.addVariableStatistics(variable, symbolStats).build();
assertNormalized(estimate)
.totalSize(40000000000L)
.variableStats(variable, variableAssert -> variableAssert.distinctValuesCount(expectedNormalizedNdv));
}
@Test
public void testTotalSizeOnlyMaintainsTotalSize()
{
PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder()
.setOutputRowCount(NaN)
.setTotalSize(120)
.build();
assertNormalized(estimate)
.outputRowsCountUnknown()
.totalSize(120);
}
private PlanNodeStatsAssertion assertNormalized(PlanNodeStatsEstimate estimate)
{
PlanNodeStatsEstimate normalized = normalizer.normalize(estimate, estimate.getVariablesWithKnownStatistics());
return PlanNodeStatsAssertion.assertThat(normalized);
}
private double asStatsValue(Object value, Type type)
{
return toStatsRepresentation(functionAndTypeManager, session, type, value).orElse(NaN);
}
}