TestSimpleFilterProjectSemiJoinStatsRule.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.cost;

import com.facebook.presto.metadata.MetadataManager;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.PlanNodeId;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.TestingRowExpressionTranslator;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.SymbolReference;
import org.testng.annotations.Test;

import java.util.Optional;

import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder.expression;
import static com.facebook.presto.sql.planner.plan.AssignmentUtils.identityAssignments;

public class TestSimpleFilterProjectSemiJoinStatsRule
        extends BaseStatsCalculatorTest
{
    private VariableStatsEstimate aStats = VariableStatsEstimate.builder()
            .setLowValue(0)
            .setHighValue(10)
            .setDistinctValuesCount(10)
            .setNullsFraction(0.1)
            .build();

    private VariableStatsEstimate bStats = VariableStatsEstimate.builder()
            .setLowValue(0)
            .setHighValue(100)
            .setDistinctValuesCount(10)
            .setNullsFraction(0)
            .build();

    private VariableStatsEstimate cStats = VariableStatsEstimate.builder()
            .setLowValue(5)
            .setHighValue(30)
            .setDistinctValuesCount(2)
            .setNullsFraction(0.5)
            .build();

    private VariableStatsEstimate expectedAInC = VariableStatsEstimate.builder()
            .setDistinctValuesCount(2)
            .setLowValue(0)
            .setHighValue(10)
            .setNullsFraction(0)
            .build();

    private VariableStatsEstimate expectedANotInC = VariableStatsEstimate.builder()
            .setDistinctValuesCount(1.6)
            .setLowValue(0)
            .setHighValue(8)
            .setNullsFraction(0)
            .build();

    private VariableStatsEstimate expectedANotInCWithExtraFilter = VariableStatsEstimate.builder()
            .setDistinctValuesCount(8)
            .setLowValue(0)
            .setHighValue(10)
            .setNullsFraction(0)
            .build();

    private static final PlanNodeId LEFT_SOURCE_ID = new PlanNodeId("left_source_values");
    private static final PlanNodeId RIGHT_SOURCE_ID = new PlanNodeId("right_source_values");
    private static final TestingRowExpressionTranslator TRANSLATOR = new TestingRowExpressionTranslator(MetadataManager.createTestMetadataManager());

    @Test
    public void testFilterPositiveSemiJoin()
    {
        getStatsCalculatorAssertion(new SymbolReference("sjo"))
                .withSourceStats(LEFT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(1000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), aStats)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), bStats)
                        .build())
                .withSourceStats(RIGHT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(2000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "c", BIGINT), cStats)
                        .build())
                .check(check -> check.outputRowsCount(180)
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), assertion -> assertion.isEqualTo(expectedAInC))
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), assertion -> assertion.isEqualTo(bStats))
                        .variableStatsUnknown("c")
                        .variableStatsUnknown("sjo"));
    }

    @Test
    public void testFilterPositiveNarrowingProjectSemiJoin()
    {
        tester().assertStatsFor(pb -> {
            VariableReferenceExpression a = pb.variable("a", BIGINT);
            VariableReferenceExpression b = pb.variable("b", BIGINT);
            VariableReferenceExpression c = pb.variable("c", BIGINT);
            VariableReferenceExpression semiJoinOutput = pb.variable("sjo", BOOLEAN);

            PlanNode semiJoinNode = pb.semiJoin(
                    pb.values(LEFT_SOURCE_ID, a, b),
                    pb.values(RIGHT_SOURCE_ID, c),
                    a,
                    c,
                    semiJoinOutput,
                    Optional.empty(),
                    Optional.empty(),
                    Optional.empty());

            return pb.filter(
                    TRANSLATOR.translate(expression("sjo"), pb.getTypes()),
                    pb.project(identityAssignments(semiJoinOutput, a), semiJoinNode));
        })
                .withSourceStats(LEFT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(1000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), aStats)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), bStats)
                        .build())
                .withSourceStats(RIGHT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(2000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "c", BIGINT), cStats)
                        .build())
                .check(check -> check.outputRowsCount(180)
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), assertion -> assertion.isEqualTo(expectedAInC))
                        .variableStatsUnknown("b")
                        .variableStatsUnknown("c")
                        .variableStatsUnknown("sjo"));
    }

    @Test
    public void testFilterPositivePlusExtraConjunctSemiJoin()
    {
        getStatsCalculatorAssertion(expression("sjo AND a < 8"))
                .withSourceStats(LEFT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(1000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), aStats)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), bStats)
                        .build())
                .withSourceStats(RIGHT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(2000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "c", BIGINT), cStats)
                        .build())
                .check(check -> check.outputRowsCount(144)
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), assertion -> assertion.isEqualTo(expectedANotInC))
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), assertion -> assertion.isEqualTo(bStats))
                        .variableStatsUnknown("c")
                        .variableStatsUnknown("sjo"));
    }

    @Test
    public void testFilterNegativeSemiJoin()
    {
        getStatsCalculatorAssertion(expression("NOT sjo"))
                .withSourceStats(LEFT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(1000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), aStats)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), bStats)
                        .build())
                .withSourceStats(RIGHT_SOURCE_ID, PlanNodeStatsEstimate.builder()
                        .setOutputRowCount(2000)
                        .addVariableStatistics(new VariableReferenceExpression(Optional.empty(), "c", BIGINT), cStats)
                        .build())
                .check(check -> check.outputRowsCount(720)
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "a", BIGINT), assertion -> assertion.isEqualTo(expectedANotInCWithExtraFilter))
                        .variableStats(new VariableReferenceExpression(Optional.empty(), "b", BIGINT), assertion -> assertion.isEqualTo(bStats))
                        .variableStatsUnknown("c")
                        .variableStatsUnknown("sjo"));
    }

    private StatsCalculatorAssertion getStatsCalculatorAssertion(Expression expression)
    {
        return tester().assertStatsFor(pb -> {
            VariableReferenceExpression a = pb.variable("a", BIGINT);
            VariableReferenceExpression b = pb.variable("b", BIGINT);
            VariableReferenceExpression c = pb.variable("c", BIGINT);
            VariableReferenceExpression semiJoinOutput = pb.variable("sjo", BOOLEAN);

            PlanNode semiJoinNode = pb.semiJoin(
                    pb.values(LEFT_SOURCE_ID, a, b),
                    pb.values(RIGHT_SOURCE_ID, c),
                    a,
                    c,
                    semiJoinOutput,
                    Optional.empty(),
                    Optional.empty(),
                    Optional.empty());

            return pb.filter(TRANSLATOR.translate(expression, pb.getTypes()), semiJoinNode);
        });
    }
}