UnnestStatsRule.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.cost;
import com.facebook.presto.Session;
import com.facebook.presto.cost.ComposableStatsCalculator.Rule;
import com.facebook.presto.matching.Pattern;
import com.facebook.presto.spi.plan.UnnestNode;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.planner.TypeProvider;
import com.facebook.presto.sql.planner.iterative.Lookup;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.facebook.presto.sql.planner.plan.Patterns.unnest;
public class UnnestStatsRule
implements Rule<UnnestNode>
{
private static final int UPPER_BOUND_ROW_COUNT_FOR_ESTIMATION = 1;
@Override
public Pattern<UnnestNode> getPattern()
{
return unnest();
}
@Override
public Optional<PlanNodeStatsEstimate> calculate(UnnestNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types)
{
PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource());
PlanNodeStatsEstimate.Builder calculatedStats = PlanNodeStatsEstimate.builder();
if (sourceStats.getOutputRowCount() > UPPER_BOUND_ROW_COUNT_FOR_ESTIMATION) {
return Optional.empty();
}
// Since we don't have stats for cardinality about the unnest column, we cannot estimate the row count.
// However, when the source row count is low, the error would not matter much in query optimization.
// Thus we'd still populate the inaccurate numbers just so stats are populated to enable optimization
// potential.
calculatedStats.setOutputRowCount(sourceStats.getOutputRowCount());
for (VariableReferenceExpression variable : node.getReplicateVariables()) {
calculatedStats.addVariableStatistics(variable, sourceStats.getVariableStatistics(variable));
}
for (Map.Entry<VariableReferenceExpression, List<VariableReferenceExpression>> entry : node.getUnnestVariables().entrySet()) {
List<VariableReferenceExpression> unnestToVariables = entry.getValue();
VariableStatsEstimate stats = sourceStats.getVariableStatistics(entry.getKey());
for (VariableReferenceExpression variable : unnestToVariables) {
// This is a very conservative way on estimating stats after unnest. We assume each symbol
// after unnest would have as much data as the symbol before unnest. This would over
// estimate, which are more likely to mean we'd loose an optimization opportunity, but at
// least it won't cause false optimizations.
calculatedStats.addVariableStatistics(
variable,
VariableStatsEstimate.builder()
.setAverageRowSize(stats.getAverageRowSize())
.build());
}
}
if (node.getOrdinalityVariable().isPresent()) {
calculatedStats.addVariableStatistics(
node.getOrdinalityVariable().get(),
VariableStatsEstimate.builder()
.setLowValue(0)
.setNullsFraction(0)
.build());
}
return Optional.of(calculatedStats.build());
}
}