TestRowNumberStatsRule.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.cost;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.google.common.collect.ImmutableList;
import org.testng.annotations.Test;
import java.util.Optional;
import static com.facebook.presto.common.type.BigintType.BIGINT;
public class TestRowNumberStatsRule
extends BaseStatsCalculatorTest
{
private VariableStatsEstimate xStats = VariableStatsEstimate.builder()
.setDistinctValuesCount(5.0)
.setNullsFraction(0)
.build();
private VariableStatsEstimate yStats = VariableStatsEstimate.builder()
.setDistinctValuesCount(5.0)
.setNullsFraction(0.5)
.build();
@Test
public void testSingleGroupingKey()
{
// grouping on a key with 0 nulls fraction without max rows per partition limit
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT)),
Optional.empty(),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(10)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(check -> check
.outputRowsCount(10)
.variableStats(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), assertion -> assertion.isEqualTo(xStats))
.variableStats(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), assertion -> assertion.isEqualTo(yStats))
.variableStats(new VariableReferenceExpression(Optional.empty(), "z", BIGINT), assertion -> assertion
.lowValue(1)
.distinctValuesCount(2)
.nullsFraction(0)
.averageRowSize(BIGINT.getFixedSize())));
// grouping on a key with 0 nulls fraction with max rows per partition limit
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT)),
Optional.of(1),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(10)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(check -> check
.outputRowsCount(5)
.variableStats(new VariableReferenceExpression(Optional.empty(), "z", BIGINT), assertion -> assertion
.lowValue(1)
.distinctValuesCount(1)
.nullsFraction(0)
.averageRowSize(BIGINT.getFixedSize())));
// grouping on a key with non zero nulls fraction
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("y", BIGINT)),
Optional.empty(),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(60)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(check -> check
.outputRowsCount(60)
.variableStats(new VariableReferenceExpression(Optional.empty(), "z", BIGINT), assertion -> assertion
.lowValue(1)
.distinctValuesCount(10)
.nullsFraction(0)
.averageRowSize(BIGINT.getFixedSize())));
// unknown input row count
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT)),
Optional.of(1),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(PlanNodeStatsAssertion::outputRowsCountUnknown);
}
@Test
public void testMultipleGroupingKeys()
{
// grouping on multiple keys with the number of estimated groups less than the row count
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT), pb.variable("y", BIGINT)),
Optional.empty(),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(60)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(check -> check
.outputRowsCount(60)
.variableStats(new VariableReferenceExpression(Optional.empty(), "z", BIGINT), assertion -> assertion
.lowValue(1)
.distinctValuesCount(2)
.nullsFraction(0)
.averageRowSize(BIGINT.getFixedSize())));
// grouping on multiple keys with the number of estimated groups greater than the row count
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT), pb.variable("y", BIGINT)),
Optional.empty(),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(20)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), yStats)
.build())
.check(check -> check
.outputRowsCount(20)
.variableStats(new VariableReferenceExpression(Optional.empty(), "z", BIGINT), assertion -> assertion
.lowValue(1)
.distinctValuesCount(1)
.nullsFraction(0)
.averageRowSize(BIGINT.getFixedSize())));
// grouping on multiple keys with stats for one of the keys are unknown
tester().assertStatsFor(pb -> pb
.rowNumber(
ImmutableList.of(pb.variable("x", BIGINT), pb.variable("y", BIGINT)),
Optional.empty(),
pb.variable("z", BIGINT),
pb.values(pb.variable("x", BIGINT), pb.variable("y", BIGINT))))
.withSourceStats(0, PlanNodeStatsEstimate.builder()
.setOutputRowCount(20)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "x", BIGINT), xStats)
.addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "y", BIGINT), VariableStatsEstimate.unknown())
.build())
.check(PlanNodeStatsAssertion::outputRowsCountUnknown);
}
}