TestEarlyOutJoins.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.sql.planner;

import com.facebook.presto.Session;
import com.facebook.presto.sql.planner.assertions.BasePlanTest;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.testng.annotations.Test;

import static com.facebook.presto.SystemSessionProperties.EXPLOIT_CONSTRAINTS;
import static com.facebook.presto.SystemSessionProperties.IN_PREDICATES_AS_INNER_JOINS_ENABLED;
import static com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY;
import static com.facebook.presto.SystemSessionProperties.PUSH_AGGREGATION_BELOW_JOIN_BYTE_REDUCTION_THRESHOLD;
import static com.facebook.presto.spi.plan.JoinType.INNER;
import static com.facebook.presto.sql.analyzer.FeaturesConfig.JoinReorderingStrategy.AUTOMATIC;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.aggregation;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.assignUniqueId;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.equiJoinClause;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.join;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.output;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.project;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.semiJoin;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.tableScan;

public class TestEarlyOutJoins
        extends BasePlanTest
{
    private ImmutableMap<String, String> nationColumns = ImmutableMap.<String, String>builder()
            .put("regionkey", "regionkey")
            .put("nationkey", "nationkey")
            .put("name", "name")
            .put("comment", "comment")
            .build();

    private ImmutableMap<String, String> orderColumns = ImmutableMap.<String, String>builder()
            .put("orderpriority", "orderpriority")
            .put("orderstatus", "orderstatus")
            .put("totalprice", "totalprice")
            .put("orderkey", "orderkey")
            .put("custkey", "custkey")
            .put("orderdate", "orderdate")
            .put("comment", "comment")
            .put("shippriority", "shippriority")
            .put("clerk", "clerk")
            .build();

    public TestEarlyOutJoins()
    {
        super(ImmutableMap.of(EXPLOIT_CONSTRAINTS, Boolean.toString(true),
                IN_PREDICATES_AS_INNER_JOINS_ENABLED, Boolean.toString(true),
                JOIN_REORDERING_STRATEGY, AUTOMATIC.name()));
    }

    @Test
    public void testDistinctInnerRewrite()
    {
        // Rewrite to distinct + inner join and then reorder the join (orders >> nation)

        String query = "select * from nation where nationkey in (select custkey from orders)";
        assertPlan(query,
                output(
                        project(
                                aggregation(ImmutableMap.of(),
                                        anyTree(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                                        anyTree(
                                                                tableScan("orders", ImmutableMap.of("custkey", "custkey"))),
                                                        anyTree(
                                                                assignUniqueId(
                                                                        "unique",
                                                                        tableScan("nation", nationColumns)))))))));
    }

    @Test
    public void testDistinctInnerToLeftEarlyOutRewrite()
    {
        // Rewrite inner join to semi join
        String query = "select distinct o.custkey, o.totalprice from orders o, nation n where o.custkey = n.nationkey";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("custkey", "nationkey", "semijoinvariable$eoj",
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey", "totalprice", "totalprice"))),
                                        anyTree(
                                                tableScan("nation", ImmutableMap.of("nationkey", "nationkey")))))));

        // Negative test - too many columns read from nation
        query = "select distinct o.custkey, o.totalprice, n.nationkey, n.name from orders o, nation n where o.custkey = n.nationkey";
        assertPlan(query,
                output(
                        anyTree(
                                join(INNER,
                                        ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey", "totalprice", "totalprice"))),
                                        anyTree(
                                                tableScan("nation", ImmutableMap.of("nationkey", "nationkey", "name", "name")))))));

        // Trasnform to distinct + inner join and then transform back to semi join
        // The join inputs were not reordered and the join was cardinality reducing
        query = "select * from orders where custkey in (select custkey from customer where name = 'Customer#000156251')";
        assertPlan(query,
                output(
                        project(
                                anyTree(
                                        semiJoin("custkey", "custkey_1", "semijoinvariable$eoj",
                                                anyTree(
                                                        tableScan("orders", orderColumns)),
                                                anyTree(
                                                        tableScan("customer", ImmutableMap.of("custkey_1", "custkey", "name", "name"))))))));
    }

    @Test
    public void testDistinctInnerToRightEarlyOutRewrite()
    {
        // Rewrite to distinct + inner join and then push aggregation below the left input
        // The join inputs were reordered and the join was not cardinality reducing
        String query = "select orderkey from orders where orderkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        join(INNER,
                                ImmutableList.of(equiJoinClause("orderkey_1", "orderkey")),
                                project(
                                        aggregation(ImmutableMap.of(),
                                                tableScan("lineitem", ImmutableMap.of("orderkey_1", "orderkey")))),
                                anyTree(
                                        tableScan("orders", ImmutableMap.of("orderkey", "orderkey"))))));

        Session sessionWithIncreasedByteReductionThreshold = Session.builder(getQueryRunner().getDefaultSession())
                .setSystemProperty(PUSH_AGGREGATION_BELOW_JOIN_BYTE_REDUCTION_THRESHOLD, Double.toString(2))
                .build();

        // Same as previous query, except the aggregation is not pushed below the join since the join is not
        // considered cardinality reducing due to the altered value for parameter EARLY_OUT_JOIN_TRANSFORMATION_BYTE_REDUCTION_THRESHOLD
        assertPlanWithSession(query,
                sessionWithIncreasedByteReductionThreshold,
                false,
                output(
                        anyTree(
                                aggregation(ImmutableMap.of(),
                                        anyTree(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("orderkey_1", "orderkey")),
                                                        anyTree(
                                                                tableScan("lineitem", ImmutableMap.of("orderkey_1", "orderkey"))),
                                                        anyTree(
                                                                assignUniqueId("unique",
                                                                        tableScan("orders", ImmutableMap.of("orderkey", "orderkey"))))))))));

        // Aggregation pushed down the left input of the join, but not the right since the output contains o.custkey (not a join key in the output)
        query = "select distinct l.orderkey, l.partkey, o.custkey from lineitem l, orders o where l.orderkey = o.orderkey";
        assertPlan(query,
                output(
                        anyTree(
                                join(INNER,
                                        ImmutableList.of(equiJoinClause("orderkey", "orderkey_0")),
                                        anyTree(
                                                aggregation(ImmutableMap.of(),
                                                        anyTree(
                                                                tableScan("lineitem", ImmutableMap.of("partkey", "partkey", "orderkey", "orderkey"))))),
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey", "orderkey_0", "orderkey")))))));
    }

    @Test
    public void testAntiJoinScenarios()
    {
        // No NOT's or OR's or CASE's

        String query = "select * from nation where nationkey not in (select custkey from orders)";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("nationkey", "custkey", "expr_9",
                                        anyTree(
                                                tableScan("nation", nationColumns)),
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey")))))));

        query = "select * from nation where nationkey in (select custkey from orders) or nationkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("nationkey", "orderkey_11", "expr_21",
                                        semiJoin("nationkey", "custkey", "expr_9",
                                                anyTree(
                                                        tableScan("nation", nationColumns)),
                                                anyTree(
                                                        tableScan("orders", ImmutableMap.of("custkey", "custkey")))),
                                        anyTree(
                                                tableScan("lineitem", ImmutableMap.of("orderkey_11", "orderkey")))))));

        query = "select * from nation where not (nationkey = any (select custkey from orders))";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("nationkey", "custkey", "expr_9",
                                        anyTree(
                                                tableScan("nation", nationColumns)),
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey")))))));

        query = "select case when (nationkey in (select custkey from orders)) then 1 else 2 end from nation";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("nationkey", "custkey", "expr_9",
                                        anyTree(
                                                tableScan("nation", ImmutableMap.of("nationkey", "nationkey"))),
                                        anyTree(
                                                tableScan("orders", ImmutableMap.of("custkey", "custkey")))))));
    }

    @Test
    void testCombinationsOfSubqueries()
    {
        // Rewrite to distinct + inner join for both subqueries and then find optimal order for the 3-way join
        String query = "select * from nation where nationkey in (select custkey from orders) and nationkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        anyTree(
                                aggregation(ImmutableMap.of(),
                                        anyTree(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("orderkey_9", "nationkey")),
                                                        anyTree(
                                                                tableScan("lineitem", ImmutableMap.of("orderkey_9", "orderkey"))),
                                                        anyTree(
                                                                aggregation(ImmutableMap.of(),
                                                                        project(
                                                                                join(INNER,
                                                                                        ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                                                                        anyTree(
                                                                                                tableScan("orders", ImmutableMap.of("custkey", "custkey"))),
                                                                                        anyTree(assignUniqueId("unique_34",
                                                                                                tableScan("nation", nationColumns)))))))))))));

        // Disjoint conjunctions also transformed similarly
        query = "select * from nation where nationkey in (select custkey from orders) and regionkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        anyTree(
                                aggregation(ImmutableMap.of(),
                                        project(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("orderkey_9", "regionkey")),
                                                        anyTree(
                                                                tableScan("lineitem", ImmutableMap.of("orderkey_9", "orderkey"))),
                                                        assignUniqueId("unique",
                                                                project(
                                                                        aggregation(ImmutableMap.of(),
                                                                                anyTree(
                                                                                        join(INNER,
                                                                                                ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                                                                                anyTree(
                                                                                                        tableScan("orders", ImmutableMap.of("custkey", "custkey"))),
                                                                                                anyTree(
                                                                                                        assignUniqueId("unique_34",
                                                                                                                tableScan("nation", nationColumns))))))))))))));
    }

    @Test
    void testComplexQueries()
    {
        // Subquery produces distinct output -> is rewritten to inner join, inputs are reordered and extraneous aggregations are removed
        String query = "select * from nation where nationkey in (select custkey from orders group by custkey)";
        assertPlan(query,
                output(
                        join(INNER,
                                ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                anyTree(
                                        aggregation(ImmutableMap.of(),
                                                anyTree(
                                                        tableScan("orders", ImmutableMap.of("custkey", "custkey"))))),
                                anyTree(
                                        tableScan("nation", nationColumns)))));

        // In predicates in the having clause
        query = "select nationkey, name from nation having nationkey in (select custkey from orders)";
        assertPlan(query,
                output(
                        project(
                                aggregation(ImmutableMap.of(),
                                        anyTree(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                                        anyTree(
                                                                tableScan("orders", ImmutableMap.of("custkey", "custkey"))),
                                                        anyTree(
                                                                assignUniqueId("unique",
                                                                        tableScan("nation", ImmutableMap.of("nationkey", "nationkey", "name", "name"))))))))));

        query = "select nationkey, name from nation having nationkey in (select custkey from orders) and nationkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        project(
                                aggregation(ImmutableMap.of(),
                                        project(
                                                join(INNER,
                                                        ImmutableList.of(equiJoinClause("orderkey_9", "nationkey")),
                                                        anyTree(
                                                                tableScan("lineitem", ImmutableMap.of("orderkey_9", "orderkey"))),
                                                        assignUniqueId("unique",
                                                                anyTree(
                                                                        aggregation(ImmutableMap.of(),
                                                                                project(
                                                                                        join(INNER,
                                                                                                ImmutableList.of(equiJoinClause("custkey", "nationkey")),
                                                                                                anyTree(
                                                                                                        tableScan("orders", ImmutableMap.of("custkey", "custkey"))),
                                                                                                anyTree(
                                                                                                        assignUniqueId("unique_26",
                                                                                                                tableScan("nation", ImmutableMap.of("nationkey", "nationkey", "name", "name")))))))))))))));

        query = "select nationkey, name from nation having nationkey in (select custkey from orders) or nationkey in (select orderkey from lineitem)";
        assertPlan(query,
                output(
                        anyTree(
                                semiJoin("nationkey", "orderkey_9", "expr_17",
                                        semiJoin("nationkey", "custkey", "expr_7",
                                                anyTree(
                                                        tableScan("nation", ImmutableMap.of("name", "name", "nationkey", "nationkey"))),
                                                anyTree(
                                                        tableScan("orders", ImmutableMap.of("custkey", "custkey")))),
                                        anyTree(
                                                tableScan("lineitem", ImmutableMap.of("orderkey_9", "orderkey")))))));
    }
}