TestLogicalCteOptimizer.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql.planner.optimizations;
import com.facebook.presto.Session;
import com.facebook.presto.spi.WarningCollector;
import com.facebook.presto.spi.plan.CteConsumerNode;
import com.facebook.presto.spi.plan.CteProducerNode;
import com.facebook.presto.spi.plan.CteReferenceNode;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.sql.Optimizer;
import com.facebook.presto.sql.planner.Plan;
import com.facebook.presto.sql.planner.assertions.BasePlanTest;
import com.facebook.presto.sql.planner.assertions.PlanAssert;
import com.facebook.presto.sql.planner.assertions.PlanMatchPattern;
import com.facebook.presto.sql.planner.plan.SequenceNode;
import com.google.common.collect.ImmutableList;
import org.testng.annotations.Test;
import java.util.List;
import java.util.function.Consumer;
import static com.facebook.presto.SystemSessionProperties.CTE_HEURISTIC_REPLICATION_THRESHOLD;
import static com.facebook.presto.SystemSessionProperties.CTE_MATERIALIZATION_STRATEGY;
import static com.facebook.presto.sql.planner.SqlPlannerContext.CteInfo.delimiter;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.cteConsumer;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.cteProducer;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.join;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.lateral;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.sequence;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.tableScan;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values;
import static org.testng.Assert.assertFalse;
public class TestLogicalCteOptimizer
extends BasePlanTest
{
private static final List<Class<? extends PlanNode>> CTE_PLAN_NODES = ImmutableList.of(CteReferenceNode.class, CteConsumerNode.class, CteProducerNode.class, SequenceNode.class);
@Test
public void testConvertSimpleCte()
{
assertUnitPlan("WITH temp as (SELECT orderkey FROM ORDERS) " +
"SELECT * FROM temp t JOIN temp t2 ON true ",
anyTree(
sequence(cteProducer(addQueryScopeDelimiter("temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 0))))));
}
@Test
public void testSimpleRedefinedCteWithSameNameDefinedAgain()
{
assertUnitPlan("WITH \n" +
"test_base AS (SELECT colB FROM (VALUES (1), (2)) AS TempTable(colB)),\n" +
"test AS (\n" +
" \n" +
" WITH test_base as (SELECT colA FROM (VALUES (1), (2)) AS TempTable(colA)),\n" +
" test1 AS (\n" +
" WITH test2 AS(\n" +
" SELECT * FROM test_base\n" +
" )\n" +
" SELECT * FROM test2\n" +
" )\n" +
" SELECT * FROM test1\n" +
")\n" +
"\n" +
"SELECT * FROM test",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("test", 3), anyTree(cteConsumer(addQueryScopeDelimiter("test1", 2)))),
cteProducer(addQueryScopeDelimiter("test1", 2), anyTree(cteConsumer(addQueryScopeDelimiter("test2", 1)))),
cteProducer(addQueryScopeDelimiter("test2", 1), anyTree(cteConsumer(addQueryScopeDelimiter("test_base", 0)))),
cteProducer(addQueryScopeDelimiter("test_base", 0), anyTree(values("colA"))),
anyTree(cteConsumer(addQueryScopeDelimiter("test", 3))))));
}
@Test
public void testSimpleRedefinedCteWithSameName()
{
assertUnitPlan("WITH temp as " +
"( with temp as (SELECT orderkey FROM ORDERS) SELECT * FROM temp) " +
"SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("temp", 1), anyTree(cteConsumer(addQueryScopeDelimiter("temp", 0)))),
cteProducer(addQueryScopeDelimiter("temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 1))))));
}
@Test
public void testComplexRedefinedNestedCtes()
{
assertUnitPlan(
"WITH " +
"cte1 AS ( " +
" SELECT orderkey, totalprice FROM ORDERS WHERE orderkey < 100 " +
"), " +
"cte2 AS ( " +
" WITH cte3 AS ( WITH cte4 AS (SELECT orderkey, totalprice FROM cte1 WHERE totalprice > 1000) SELECT * FROM cte4) " +
" SELECT cte3.orderkey FROM cte3 " +
"), " +
"cte3 AS ( " +
" SELECT * FROM customer WHERE custkey < 50 " +
") " +
"SELECT cte3.*, cte2.orderkey FROM cte3 JOIN cte2 ON cte3.custkey = cte2.orderkey",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("cte3", 0), anyTree(tableScan("customer"))),
cteProducer(addQueryScopeDelimiter("cte2", 4), anyTree(cteConsumer(addQueryScopeDelimiter("cte3", 3)))),
cteProducer(addQueryScopeDelimiter("cte3", 3), anyTree(cteConsumer(addQueryScopeDelimiter("cte4", 2)))),
cteProducer(addQueryScopeDelimiter("cte4", 2), anyTree(cteConsumer(addQueryScopeDelimiter("cte1", 1)))),
cteProducer(addQueryScopeDelimiter("cte1", 1), anyTree(tableScan("orders"))),
anyTree(
join(
anyTree(cteConsumer(addQueryScopeDelimiter("cte3", 0))),
anyTree(cteConsumer(addQueryScopeDelimiter("cte2", 4))))))));
}
@Test
public void testRedefinedCteConflictingNamesInDifferentScope()
{
assertUnitPlan("WITH test AS (SELECT colA FROM (VALUES (1), (2)) AS TempTable(colA)),\n" +
" _query AS (\n" +
" with test AS (\n" +
" SELECT * FROM test\n" +
" )\n" +
" SELECT * FROM test\n" +
" )\n" +
" SELECT * FROM _query",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("_query", 2), anyTree(cteConsumer(addQueryScopeDelimiter("test", 1)))),
cteProducer(addQueryScopeDelimiter("test", 1), anyTree(cteConsumer(addQueryScopeDelimiter("test", 0)))),
cteProducer(addQueryScopeDelimiter("test", 0), anyTree(values("colA"))),
anyTree(cteConsumer(addQueryScopeDelimiter("_query", 2))))));
}
@Test
public void testCtesDefinedInEntirelyDifferentScope()
{
// From clause is visited first
assertUnitPlan("SELECT \n" +
" *, (WITH T as (SELECT colA FROM (VALUES (1), (2)) AS TempTable(colA)) SELECT * FROM T)\n" +
"FROM (\n" +
" WITH T AS ( \n" +
" SELECT ColumnA, ColumnB FROM (\n" +
" VALUES \n" +
" (1, 'A'),\n" +
" (2, 'B'),\n" +
" (3, 'C'),\n" +
" (4, 'D')\n" +
" ) AS TempTable(ColumnA, ColumnB)\n" +
" )\n" +
" SELECT * FROM T JOIN T ON TRUE" +
")",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("T", 0), anyTree(values("ColumnA", "ColumnB"))),
cteProducer(addQueryScopeDelimiter("T", 1), anyTree(values("colA"))),
anyTree(lateral(ImmutableList.of(),
anyTree(join(anyTree(cteConsumer(addQueryScopeDelimiter("T", 0))), anyTree(cteConsumer(addQueryScopeDelimiter("T", 0))))),
anyTree(cteConsumer(addQueryScopeDelimiter("T", 1))))))));
}
@Test
public void testNestedCtesReused()
{
assertUnitPlan("WITH cte1 AS ( WITH cte2 as (SELECT orderkey FROM ORDERS WHERE orderkey < 100)" +
"SELECT * FROM cte2)" +
"SELECT * FROM cte1 JOIN cte1 ON true",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("cte1", 1), anyTree(cteConsumer(addQueryScopeDelimiter("cte2", 0)))),
cteProducer(addQueryScopeDelimiter("cte2", 0), anyTree(tableScan("orders"))),
anyTree(join(anyTree(cteConsumer(addQueryScopeDelimiter("cte1", 1))), anyTree(cteConsumer(addQueryScopeDelimiter("cte1", 1))))))));
}
@Test
public void testRedefinedCtesInDifferentScope()
{
assertUnitPlan("WITH cte1 AS ( WITH cte2 as (SELECT orderkey FROM ORDERS WHERE orderkey < 100)" +
"SELECT * FROM cte2), " +
" cte2 AS (SELECT * FROM customer WHERE custkey < 50) " +
"SELECT * FROM cte2 JOIN cte1 ON true",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("cte2", 0), anyTree(tableScan("customer"))),
cteProducer(addQueryScopeDelimiter("cte1", 2), anyTree(cteConsumer(addQueryScopeDelimiter("cte2", 1)))),
cteProducer(addQueryScopeDelimiter("cte2", 1), anyTree(tableScan("orders"))),
anyTree(join(anyTree(cteConsumer(addQueryScopeDelimiter("cte2", 0))), anyTree(cteConsumer(addQueryScopeDelimiter("cte1", 2))))))));
}
@Test
public void testNestedCte()
{
assertUnitPlan("WITH temp1 as (SELECT orderkey FROM ORDERS), " +
" temp2 as (SELECT * FROM temp1) " +
"SELECT * FROM temp2",
anyTree(
sequence(cteProducer(addQueryScopeDelimiter("temp2", 1), anyTree(cteConsumer(addQueryScopeDelimiter("temp1", 0)))),
cteProducer(addQueryScopeDelimiter("temp1", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp2", 1))))));
}
@Test
public void testMultipleIndependentCtes()
{
assertUnitPlan("WITH temp1 as (SELECT orderkey FROM ORDERS), " +
" temp2 as (SELECT custkey FROM CUSTOMER) " +
"SELECT * FROM temp1, temp2",
anyTree(
sequence(cteProducer(addQueryScopeDelimiter("temp1", 0), anyTree(tableScan("orders"))),
cteProducer(addQueryScopeDelimiter("temp2", 1), anyTree(tableScan("customer"))),
anyTree(join(anyTree(cteConsumer(addQueryScopeDelimiter("temp1", 0))), anyTree(cteConsumer(addQueryScopeDelimiter("temp2", 1))))))));
}
@Test
public void testDependentCtes()
{
assertUnitPlan("WITH temp1 as (SELECT orderkey FROM ORDERS), " +
" temp2 as (SELECT orderkey FROM temp1) " +
"SELECT * FROM temp2 , temp1",
anyTree(
sequence(cteProducer(addQueryScopeDelimiter("temp2", 1), anyTree(cteConsumer(addQueryScopeDelimiter("temp1", 0)))),
cteProducer(addQueryScopeDelimiter("temp1", 0), anyTree(tableScan("orders"))),
anyTree(join(anyTree(cteConsumer(addQueryScopeDelimiter("temp2", 1))), anyTree(cteConsumer(addQueryScopeDelimiter("temp1", 0))))))));
}
@Test
public void testComplexCteWithJoins()
{
assertUnitPlan(
"WITH cte_orders AS (SELECT orderkey, custkey FROM ORDERS), " +
" cte_line_item AS (SELECT l.orderkey, l.suppkey FROM lineitem l JOIN cte_orders o ON l.orderkey = o.orderkey) " +
"SELECT li.orderkey, s.suppkey, s.name FROM cte_line_item li JOIN SUPPLIER s ON li.suppkey = s.suppkey",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("cte_line_item", 1),
anyTree(
join(
anyTree(tableScan("lineitem")),
anyTree(cteConsumer(addQueryScopeDelimiter("cte_orders", 0)))))),
cteProducer(addQueryScopeDelimiter("cte_orders", 0), anyTree(tableScan("orders"))),
anyTree(
join(
anyTree(cteConsumer(addQueryScopeDelimiter("cte_line_item", 1))),
anyTree(tableScan("supplier")))))));
}
@Test
public void testSimplePersistentCteWithRowTypeAndNonRowType()
{
assertUnitPlan("WITH temp AS (" +
" SELECT * FROM (VALUES " +
" (CAST(ROW('example_status', 100) AS ROW(status VARCHAR, amount INTEGER)), 1)," +
" (CAST(ROW('another_status', 200) AS ROW(status VARCHAR, amount INTEGER)), 2)" +
" ) AS t (order_details, orderkey)" +
") SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("temp", 0), anyTree(values("status", "amount"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 0))))));
}
// ToDo prestodb/21791: write with 0 length varchar fails in hive
// See reference - While Presto supports Varchar of length 0 (as discussed in trinodb/trino#1136
@Test(enabled = false)
public void testNoPersistentCteWithZeroLengthVarcharType()
{
assertUnitPlan("WITH temp AS (" +
" SELECT * FROM (VALUES " +
" (CAST('' AS VARCHAR(0)), 9)" +
" ) AS t (text_column, number_column)" +
") SELECT * FROM temp",
anyTree(values("text_column", "number_column")));
}
@Test
public void testHeuristicComplexCteMaterialization()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
" SELECT orderkey FROM ORDERS GROUP BY orderkey" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 0))))));
}
@Test
public void testHeuristicComplexCteMaterializationForInnerCtes()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
"With inner_temp AS( " +
" SELECT orderkey FROM ORDERS GROUP BY orderkey)" +
"SELECT * FROM inner_temp" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("inner_temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("inner_temp", 0))))));
}
@Test
public void testNoHeuristicCteMaterializationForInnerCtes()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
"With inner_temp AS( " +
" SELECT orderkey FROM ORDERS GROUP BY orderkey)" +
"SELECT * FROM inner_temp GROUP BY orderkey" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("temp", 1), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 1))))));
}
@Test
public void testHeuristicComplexCteMaterializationForBothCtes()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (\n" +
" WITH inner_temp AS (\n" +
" SELECT orderkey\n" +
" FROM ORDERS\n" +
" GROUP BY orderkey\n" +
" )\n" +
" SELECT * FROM inner_temp GROUP BY orderkey \n" +
" UNION\n" +
" SELECT * FROM inner_temp GROUP BY orderkey\n" +
" UNION\n" +
" SELECT * FROM inner_temp GROUP BY orderkey\n" +
" UNION\n" +
" SELECT * FROM inner_temp GROUP BY orderkey\n" +
")\n" +
"SELECT * FROM temp\n" +
"UNION\n" +
"SELECT * FROM temp\n" +
"UNION\n" +
"SELECT * FROM temp\n" +
"UNION\n" +
"SELECT * FROM temp",
anyTree(
// both materialized
sequence(
cteProducer(addQueryScopeDelimiter("temp", 1), anyTree(cteConsumer(addQueryScopeDelimiter("inner_temp", 0)))),
cteProducer(addQueryScopeDelimiter("inner_temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 1))))));
}
@Test
public void testHeuristicMaterializationWithMultipleNestedCtesAllMaterialized()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "2")
.build(),
"WITH outer_temp AS (\n" +
" WITH mid_temp AS (\n" +
" WITH inner_temp AS (\n" +
" SELECT orderkey, COUNT(*) as total_orders FROM ORDERS GROUP BY orderkey\n" +
" )\n" +
" SELECT orderkey FROM inner_temp WHERE total_orders > 1\n" +
" UNION ALL \n" +
" SELECT orderkey FROM inner_temp\n" +
" )\n" +
" SELECT orderkey FROM mid_temp\n" +
" UNION ALL \n" +
" SELECT orderkey FROM mid_temp\n" +
")\n" +
"SELECT * FROM outer_temp \n" +
"UNION ALL " +
"SELECT * FROM outer_temp \n" +
"UNION ALL " +
"SELECT * FROM outer_temp \n",
anyTree(
// Expect specific materializations based on the threshold and complexity
sequence(
cteProducer(addQueryScopeDelimiter("outer_temp", 2), anyTree(cteConsumer(addQueryScopeDelimiter("mid_temp", 1)))),
cteProducer(addQueryScopeDelimiter("mid_temp", 1), anyTree(cteConsumer(addQueryScopeDelimiter("inner_temp", 0)))),
cteProducer(addQueryScopeDelimiter("inner_temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("outer_temp", 2))))));
}
@Test
public void testHeuristicMaterializationWithMultipleNestedCtesWhereInnerNotMaterialized()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "3")
.build(),
"WITH outer_temp AS (\n" +
" WITH mid_temp AS (\n" +
" WITH inner_temp AS (\n" +
" SELECT orderkey, COUNT(*) as total_orders FROM ORDERS GROUP BY orderkey\n" +
" )\n" +
" SELECT orderkey FROM inner_temp WHERE total_orders > 1\n" +
" UNION ALL \n" +
" SELECT orderkey FROM inner_temp\n" +
" )\n" +
" SELECT orderkey FROM mid_temp\n" +
" UNION ALL \n" +
" SELECT orderkey FROM mid_temp\n" +
" UNION ALL \n" +
" SELECT orderkey FROM mid_temp\n" +
")" +
"SELECT * FROM outer_temp \n" +
"UNION ALL " +
"SELECT * FROM outer_temp \n" +
"UNION ALL " +
"SELECT * FROM outer_temp \n",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("outer_temp", 2), anyTree(cteConsumer(addQueryScopeDelimiter("mid_temp", 1)))),
cteProducer(addQueryScopeDelimiter("mid_temp", 1), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("outer_temp", 2))))));
}
@Test
public void testHeuristicMaterializationWithDifferentPaths()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "3")
.build(),
"WITH first_cte AS (\n" +
" SELECT orderkey FROM ORDERS\n" +
"), second_cte AS (\n" +
" SELECT orderkey FROM first_cte\n" +
"), third_cte AS (\n" +
" SELECT orderkey FROM second_cte\n" +
" UNION ALL \n" +
" SELECT orderkey FROM first_cte \n" +
")\n" +
"SELECT * FROM third_cte \n" +
"UNION ALL\n" +
"SELECT * FROM second_cte \n" +
"UNION ALL\n" +
"SELECT * FROM first_cte \n",
anyTree(
// Check the materialization decisions based on usage across different paths
sequence(
cteProducer(addQueryScopeDelimiter("first_cte", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("first_cte", 0))))));
}
@Test
public void testHeuristicMaterializationWithDeepNestedCteUsage()
{
// Heuristic Threshold = 3
//CTE b is used 2 times in a.
//CTE a is used 2 times in y.
//CTE y is used 4 times in x.
//CTE x is used 3 times in the main query.
//Hence we expect cte x,y and b to be materialized
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "3")
.build(),
"WITH b AS (\n" +
" SELECT orderkey FROM ORDERS\n" +
"),\n" +
"a AS (\n" +
" SELECT orderkey FROM b\n" +
" UNION ALL\n" +
" SELECT orderkey FROM b\n" +
"),\n" +
"y AS (\n" +
" SELECT orderkey FROM a\n" +
" UNION ALL\n" +
" SELECT orderkey FROM a\n" +
"),\n" +
"x AS (\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
")\n" +
"SELECT * FROM x\n" +
"UNION ALL\n" +
"SELECT * FROM x\n" +
"UNION ALL\n" +
"SELECT * FROM x\n",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("x", 3), anyTree(cteConsumer(addQueryScopeDelimiter("y", 2)))),
cteProducer(addQueryScopeDelimiter("y", 2), anyTree(cteConsumer(addQueryScopeDelimiter("b", 0)))),
cteProducer(addQueryScopeDelimiter("b", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("x", 3))))));
}
@Test
public void testHeuristicMaterializationWithDeepNestedCteUsage2()
{
// Heuristic Threshold = 5
//CTE b is used 2 times in a.
//CTE a is used 2 times in y.
//CTE y is used 4 times in x.
//CTE x is used 3 times in the main query.
// Hence we expect cte y to be materialized
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "5")
.build(),
"WITH b AS (\n" +
" SELECT orderkey FROM ORDERS\n" +
"),\n" +
"a AS (\n" +
" SELECT orderkey FROM b\n" +
" UNION ALL\n" +
" SELECT orderkey FROM b\n" +
"),\n" +
"y AS (\n" +
" SELECT orderkey FROM a\n" +
" UNION ALL\n" +
" SELECT orderkey FROM a\n" +
"),\n" +
"x AS (\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
" UNION ALL\n" +
" SELECT orderkey FROM y\n" +
")\n" +
"SELECT * FROM x \n" +
"UNION ALL\n" +
"SELECT * FROM x\n" +
"UNION ALL\n" +
"SELECT * FROM x\n",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("y", 2), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("y", 2))))));
}
@Test
public void testHeuristicMaterializationWithDeepNestedCteUsage3()
{
// Heuristic Threshold = 3
//CTE b is used 1 times in a and 1 time in c.
//CTE c is used 1 times in the main query.
//CTE a is used 3 times in the main query.
//Hence we expect: cte a to be materialized
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "3")
.build(),
"WITH b AS (\n" +
" SELECT orderkey FROM ORDERS\n" +
"),\n" +
"a AS (\n" +
" SELECT orderkey FROM b\n" +
"),\n" +
"c as ( SELECT orderkey FROM b)\n" +
"SELECT * FROM c\n" +
"UNION ALL\n" +
"SELECT * FROM a\n" +
"UNION ALL\n" +
"SELECT * FROM a\n" +
"UNION ALL\n" +
"SELECT * FROM a\n",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("a", 2), anyTree(tableScan("orders"))),
anyTree(PlanMatchPattern.union(
PlanMatchPattern.union(
PlanMatchPattern.union(
anyTree(tableScan("orders")), anyTree(cteConsumer(addQueryScopeDelimiter("a", 2)))),
anyTree(cteConsumer(addQueryScopeDelimiter("a", 2)))),
anyTree(cteConsumer(addQueryScopeDelimiter("a", 2))))))));
}
@Test
public void testNoHeuristicComplexCteMaterializationWithoutComplexNodes()
{
assertUnitPlanWithValidator(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
" SELECT orderkey FROM ORDERS" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp",
anyTree(tableScan("orders")),
plan ->
assertFalse(PlanNodeSearcher.searchFrom(plan.getRoot())
.where(planNode -> CTE_PLAN_NODES.stream().anyMatch(clazz -> clazz.isInstance(planNode)))
.matches()));
}
@Test
public void testNoHeuristicComplexCteMaterializationWithoutDataNodes()
{
assertUnitPlanWithValidator(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC_COMPLEX_QUERIES_ONLY")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "0")
.build(),
"WITH temp AS (" +
" SELECT colB FROM (VALUES (1), (2)) AS TempTable(colB)" +
")" +
"SELECT * FROM temp ",
anyTree(values("colB")),
plan ->
assertFalse(PlanNodeSearcher.searchFrom(plan.getRoot())
.where(planNode -> CTE_PLAN_NODES.stream().anyMatch(clazz -> clazz.isInstance(planNode)))
.matches()));
}
@Test
public void testHeuristicCteMaterialization()
{
assertUnitPlan(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
" SELECT orderkey FROM ORDERS" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp",
anyTree(
sequence(
cteProducer(addQueryScopeDelimiter("temp", 0), anyTree(tableScan("orders"))),
anyTree(cteConsumer(addQueryScopeDelimiter("temp", 0))))));
}
@Test
public void testNoHeuristicCteMaterializationWithLesserReferences()
{
assertUnitPlanWithValidator(
Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "HEURISTIC")
.setSystemProperty(CTE_HEURISTIC_REPLICATION_THRESHOLD, "4")
.build(),
"WITH temp AS (" +
" SELECT orderkey FROM ORDERS" +
")" +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp " +
"UNION " +
"SELECT * FROM temp ",
anyTree(tableScan("orders")),
plan ->
assertFalse(PlanNodeSearcher.searchFrom(plan.getRoot())
.where(planNode -> CTE_PLAN_NODES.stream().anyMatch(clazz -> clazz.isInstance(planNode)))
.matches()));
}
public static String addQueryScopeDelimiter(String cteName, int scope)
{
return String.valueOf(scope) + delimiter + cteName;
}
private void assertUnitPlan(String sql, PlanMatchPattern pattern)
{
assertUnitPlan(getSession(), sql, pattern);
}
private void assertUnitPlan(Session session, String sql, PlanMatchPattern pattern)
{
List<PlanOptimizer> optimizers = ImmutableList.of(
new LogicalCteOptimizer(getQueryRunner().getMetadata()));
assertPlan(sql, session, Optimizer.PlanStage.OPTIMIZED, pattern, optimizers);
}
private void assertUnitPlanWithValidator(Session session, String sql, PlanMatchPattern pattern, Consumer<Plan> planValidator)
{
List<PlanOptimizer> optimizers = ImmutableList.of(
new LogicalCteOptimizer(getQueryRunner().getMetadata()));
getQueryRunner().inTransaction(session, transactionSession -> {
Plan actualPlan = getQueryRunner().createPlan(
transactionSession,
sql,
optimizers,
Optimizer.PlanStage.OPTIMIZED,
WarningCollector.NOOP);
PlanAssert.assertPlan(transactionSession, getQueryRunner().getMetadata(), getQueryRunner().getStatsCalculator(), actualPlan, pattern);
planValidator.accept(actualPlan);
return null;
});
}
private Session getSession()
{
return Session.builder(this.getQueryRunner().getDefaultSession())
.setSystemProperty(CTE_MATERIALIZATION_STRATEGY, "ALL")
.build();
}
}