TestSpilledAggregations.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.tests;

import com.facebook.presto.Session;
import com.facebook.presto.testing.QueryRunner;
import org.testng.annotations.Test;

import static com.facebook.presto.SystemSessionProperties.QUERY_MAX_REVOCABLE_MEMORY_PER_NODE;
import static com.facebook.presto.sessionpropertyproviders.JavaWorkerSessionPropertyProvider.AGGREGATION_SPILL_ENABLED;
import static com.facebook.presto.sessionpropertyproviders.JavaWorkerSessionPropertyProvider.DISTINCT_AGGREGATION_SPILL_ENABLED;
import static com.facebook.presto.sessionpropertyproviders.JavaWorkerSessionPropertyProvider.ORDER_BY_AGGREGATION_SPILL_ENABLED;

public class TestSpilledAggregations
        extends AbstractTestAggregations
{
    @Override
    protected QueryRunner createQueryRunner()
            throws Exception
    {
        return TestDistributedSpilledQueries.localCreateQueryRunner();
    }

    @Test
    public void testOrderBySpillingBasic()
    {
        assertQuery("SELECT orderpriority, custkey, array_agg(orderstatus ORDER BY orderstatus) FROM orders GROUP BY orderpriority, custkey ORDER BY 1, 2");
    }

    @Test
    public void testDoesNotSpillOrderByWhenDisabled()
    {
        Session session = Session.builder(getSession())
                .setSystemProperty(ORDER_BY_AGGREGATION_SPILL_ENABLED, "false")
                // set this low so that if we ran with spill the query would fail
                .setSystemProperty(QUERY_MAX_REVOCABLE_MEMORY_PER_NODE, "1B")
                .build();
        assertQuery(session,
                "SELECT orderpriority, custkey, array_agg(orderstatus ORDER BY orderstatus) FROM orders GROUP BY orderpriority, custkey");
    }

    @Test
    public void testOrderBySpillingGroupingSets()
    {
        assertQuery(
                "SELECT orderpriority, custkey, array_agg(orderstatus ORDER BY orderstatus) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) " +
                        "GROUP BY GROUPING SETS ((), (orderpriority), (orderpriority, custkey))",
                "SELECT NULL, NULL, array_agg(orderstatus ORDER BY orderstatus) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) UNION ALL " +
                        "SELECT orderpriority, NULL, array_agg(orderstatus ORDER BY orderstatus) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) GROUP BY orderpriority UNION ALL " +
                        "SELECT orderpriority, custkey, array_agg(orderstatus ORDER BY orderstatus) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) GROUP BY orderpriority, custkey");
    }

    @Test
    public void testDistinctSpillingBasic()
    {
        // the sum() is necessary so that the aggregation isn't optimized into multiple aggregation nodes
        assertQuery("SELECT custkey, sum(custkey), count(DISTINCT orderpriority) FROM orders GROUP BY custkey ORDER BY 1");
    }

    @Test
    public void testDistinctSpillingBasicWithFilter()
    {
        // the sum() is necessary so that the aggregation isn't optimized into multiple aggregation nodes
        assertQuery("SELECT custkey, sum(custkey), count(DISTINCT orderpriority) FILTER(WHERE orderkey > 10000) FROM orders GROUP BY custkey");
    }

    @Test
    public void testDistinctSpillingWithoutActualSpill()
    {
        // This test uses Spillable Accumulator but does not trigger spill since data is too less to trigger spilling
        assertQuery("SELECT custkey, orderdate, count(1), count(DISTINCT orderpriority) FROM orders where orderkey < 10000 and custkey = 1168 group by custkey, orderdate");
    }

    @Test
    public void testDistinctSpillingWithNonContiguousMashChannel()
    {
        // This test uses a maskChannel whose position in the page is not contiguous with aggregateInputChannels
        assertQuery("SELECT custkey, orderdate, count(1), count(DISTINCT orderpriority) FILTER(WHERE orderkey > 10000) FROM orders group by custkey, orderdate");
    }

    @Test
    public void testDistinctSpillingWithFilterWithoutActualSpill()
    {
        // This test uses Spillable Accumulator with maskChannel but does not trigger spill since data is too less to trigger spilling
        assertQuery("SELECT custkey, orderdate, count(1), count(DISTINCT orderpriority) FILTER(WHERE orderkey > 10000) FROM orders WHERE custkey = 1168 group by custkey, orderdate");

        // With max aggregation function
        assertQuery("SELECT custkey, orderdate, count(1), max(DISTINCT orderpriority) FILTER(WHERE orderkey > 10000) FROM orders WHERE custkey = 1168 group by custkey, orderdate");
    }

    @Test
    public void testDistinctSpillingWithAllDataFilteredOut()
    {
        // This test uses Spillable Accumulator with maskChannel but does not trigger spill since data is too less to trigger spilling
        assertQuery("SELECT custkey, orderdate, sum(orderkey), count(DISTINCT orderpriority) FILTER(WHERE false ) FROM orders GROUP BY custkey, orderdate");

        // With max aggregation function
        assertQuery("SELECT custkey, orderdate, sum(orderkey), max(DISTINCT orderpriority) FILTER(WHERE false) FROM orders GROUP BY custkey, orderdate");
    }

    @Test
    public void testDoesNotSpillDistinctWhenDisabled()
    {
        Session session = Session.builder(getSession())
                .setSystemProperty(DISTINCT_AGGREGATION_SPILL_ENABLED, "false")
                // set this low so that if we ran with spill the query would fail
                .setSystemProperty(QUERY_MAX_REVOCABLE_MEMORY_PER_NODE, "1B")
                .build();
        // the sum() is necessary so that the aggregation isn't optimized into multiple aggregation nodes
        assertQuery(session,
                "SELECT custkey, sum(custkey), count(DISTINCT orderpriority) FROM orders GROUP BY custkey");
    }

    @Test
    public void testDistinctAndOrderBySpillingBasic()
    {
        assertQuery("SELECT custkey, orderpriority, sum(custkey), array_agg(DISTINCT orderpriority ORDER BY orderpriority) FROM orders GROUP BY custkey, orderpriority ORDER BY 1, 2");
    }

    @Test
    public void testDistinctAndOrderBySpillingWithDifferentOrderByColumn()
    {
        assertQuery("Select custkey, orderpriority, sum(custkey), array_agg(orderkey ORDER BY orderdate) from orders WHERE custkey = 1499 group by custkey, orderpriority");
    }

    @Test
    public void testDistinctSpillingCount()
    {
        assertQuery("SELECT orderpriority, custkey, sum(custkey), count(DISTINCT totalprice) FROM orders GROUP BY orderpriority, custkey ORDER BY 1, 2");
    }

    @Test
    public void testDistinctSpillingGroupingSets()
    {
        assertQuery(
                "SELECT custkey, count(DISTINCT orderpriority) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) " +
                        "GROUP BY GROUPING SETS ((), (custkey))",
                "SELECT NULL, count(DISTINCT orderpriority) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) UNION ALL " +
                        "SELECT custkey, count(DISTINCT orderpriority) FROM orders WHERE orderkey IN (1, 2, 3, 4, 5) GROUP BY custkey");
    }

    @Test
    public void testNonGroupedOrderBySpill()
    {
        assertQuery("SELECT array_agg(orderstatus ORDER BY orderstatus) FROM orders");
    }

    @Test
    public void testMultipleDistinctAggregations()
    {
        assertQuery("SELECT custkey, count(DISTINCT orderpriority), count(DISTINCT orderstatus), count(DISTINCT totalprice), count(DISTINCT clerk) FROM orders GROUP BY custkey");
    }

    @Test
    public void testDoesNotSpillWhenAggregationSpillDisabled()
    {
        Session session = Session.builder(getSession())
                .setSystemProperty(AGGREGATION_SPILL_ENABLED, "false")
                // This will not spill even when distinct/orderBy Spill is enabled since aggregationSpill is disabled above
                .setSystemProperty(ORDER_BY_AGGREGATION_SPILL_ENABLED, "true")
                .setSystemProperty(DISTINCT_AGGREGATION_SPILL_ENABLED, "true")
                // set this low so that if we ran with spill the query would fail
                .setSystemProperty(QUERY_MAX_REVOCABLE_MEMORY_PER_NODE, "1B")
                .build();

        assertQuery(session,
                "SELECT orderpriority, custkey, array_agg(orderstatus ORDER BY orderstatus) FROM orders GROUP BY orderpriority, custkey");

        // the sum() is necessary so that the aggregation isn't optimized into multiple aggregation nodes
        assertQuery(session,
                "SELECT custkey, sum(custkey), count(DISTINCT orderpriority) FROM orders GROUP BY custkey");
    }
}