StreamingAggregationOperator.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.operator;

import com.facebook.presto.common.Page;
import com.facebook.presto.common.PageBuilder;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.memory.context.LocalMemoryContext;
import com.facebook.presto.operator.aggregation.AccumulatorFactory;
import com.facebook.presto.spi.plan.AggregationNode.Step;
import com.facebook.presto.spi.plan.PlanNodeId;
import com.facebook.presto.sql.gen.JoinCompiler;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;

public class StreamingAggregationOperator
        implements Operator
{
    public static class StreamingAggregationOperatorFactory
            implements OperatorFactory
    {
        private final int operatorId;
        private final PlanNodeId planNodeId;
        private final List<Type> sourceTypes;
        private final List<Type> groupByTypes;
        private final List<Integer> groupByChannels;
        private final Step step;
        private final List<AccumulatorFactory> accumulatorFactories;
        private final JoinCompiler joinCompiler;
        private boolean closed;

        public StreamingAggregationOperatorFactory(int operatorId, PlanNodeId planNodeId, List<Type> sourceTypes, List<Type> groupByTypes, List<Integer> groupByChannels, Step step, List<AccumulatorFactory> accumulatorFactories, JoinCompiler joinCompiler)
        {
            this.operatorId = operatorId;
            this.planNodeId = requireNonNull(planNodeId, "planNodeId is null");
            this.sourceTypes = ImmutableList.copyOf(requireNonNull(sourceTypes, "sourceTypes is null"));
            this.groupByTypes = ImmutableList.copyOf(requireNonNull(groupByTypes, "groupByTypes is null"));
            this.groupByChannels = ImmutableList.copyOf(requireNonNull(groupByChannels, "groupByChannels is null"));
            this.step = step;
            this.accumulatorFactories = ImmutableList.copyOf(requireNonNull(accumulatorFactories, "accumulatorFactories is null"));
            this.joinCompiler = requireNonNull(joinCompiler, "joinCompiler is null");
        }

        @Override
        public Operator createOperator(DriverContext driverContext)
        {
            checkState(!closed, "Factory is already closed");
            OperatorContext operatorContext = driverContext.addOperatorContext(operatorId, planNodeId, StreamingAggregationOperator.class.getSimpleName());
            return new StreamingAggregationOperator(operatorContext, sourceTypes, groupByTypes, groupByChannels, step, accumulatorFactories, joinCompiler);
        }

        @Override
        public void noMoreOperators()
        {
            closed = true;
        }

        @Override
        public OperatorFactory duplicate()
        {
            return new StreamingAggregationOperatorFactory(operatorId, planNodeId, sourceTypes, groupByTypes, groupByChannels, step, accumulatorFactories, joinCompiler);
        }
    }

    private final OperatorContext operatorContext;
    private final LocalMemoryContext systemMemoryContext;
    private final LocalMemoryContext userMemoryContext;
    private final List<Type> groupByTypes;
    private final int[] groupByChannels;
    private final List<AccumulatorFactory> accumulatorFactories;
    private final Step step;
    private final PagesHashStrategy pagesHashStrategy;

    private List<Aggregator> aggregates;
    private final PageBuilder pageBuilder;
    private final Deque<Page> outputPages = new LinkedList<>();
    private Page currentGroup;
    private boolean finishing;

    public StreamingAggregationOperator(OperatorContext operatorContext, List<Type> sourceTypes, List<Type> groupByTypes, List<Integer> groupByChannels, Step step, List<AccumulatorFactory> accumulatorFactories, JoinCompiler joinCompiler)
    {
        this.operatorContext = requireNonNull(operatorContext, "operatorContext is null");
        this.systemMemoryContext = operatorContext.localSystemMemoryContext();
        this.userMemoryContext = operatorContext.localUserMemoryContext();
        this.groupByTypes = ImmutableList.copyOf(requireNonNull(groupByTypes, "groupByTypes is null"));
        this.groupByChannels = Ints.toArray(requireNonNull(groupByChannels, "groupByChannels is null"));
        this.accumulatorFactories = requireNonNull(accumulatorFactories, "accumulatorFactories is null");
        this.step = requireNonNull(step, "step is null");

        this.aggregates = setupAggregates(step, accumulatorFactories);
        this.pageBuilder = new PageBuilder(toTypes(groupByTypes, aggregates));
        requireNonNull(joinCompiler, "joinCompiler is null");

        requireNonNull(sourceTypes, "sourceTypes is null");
        pagesHashStrategy = joinCompiler.compilePagesHashStrategyFactory(sourceTypes, groupByChannels, Optional.empty())
                .createPagesHashStrategy(
                        sourceTypes.stream()
                                .map(type -> ImmutableList.<Block>of())
                                .collect(toImmutableList()), OptionalInt.empty());
    }

    private List<Aggregator> setupAggregates(Step step, List<AccumulatorFactory> accumulatorFactories)
    {
        ImmutableList.Builder<Aggregator> builder = ImmutableList.builder();
        for (AccumulatorFactory factory : accumulatorFactories) {
            builder.add(new Aggregator(factory, step, this::updateMemoryUsage));
        }
        return builder.build();
    }

    private static List<Type> toTypes(List<Type> groupByTypes, List<Aggregator> aggregates)
    {
        ImmutableList.Builder<Type> builder = ImmutableList.builder();
        builder.addAll(groupByTypes);
        aggregates.stream()
                .map(Aggregator::getType)
                .forEach(builder::add);
        return builder.build();
    }

    @Override
    public OperatorContext getOperatorContext()
    {
        return operatorContext;
    }

    @Override
    public boolean needsInput()
    {
        return !finishing && outputPages.isEmpty();
    }

    @Override
    public void addInput(Page page)
    {
        checkState(!finishing, "Operator is already finishing");
        requireNonNull(page, "page is null");

        processInput(page);
        updateMemoryUsage();
    }

    private boolean updateMemoryUsage()
    {
        long memorySize = pageBuilder.getRetainedSizeInBytes();
        for (Page output : outputPages) {
            memorySize += output.getRetainedSizeInBytes();
        }
        for (Aggregator aggregator : aggregates) {
            memorySize += aggregator.getEstimatedSize();
        }

        if (currentGroup != null) {
            memorySize += currentGroup.getRetainedSizeInBytes();
        }

        if (step.isOutputPartial()) {
            systemMemoryContext.setBytes(memorySize);
        }
        else {
            userMemoryContext.setBytes(memorySize);
        }
        // If memory is not available, inform the caller that we cannot proceed for allocation.
        return operatorContext.isWaitingForMemory().isDone();
    }

    private void processInput(Page page)
    {
        requireNonNull(page, "page is null");

        Page groupByPage = page.extractChannels(groupByChannels);
        if (currentGroup != null) {
            if (!pagesHashStrategy.rowEqualsRow(0, currentGroup.extractChannels(groupByChannels), 0, groupByPage)) {
                // page starts with new group, so flush it
                evaluateAndFlushGroup(currentGroup, 0);
            }
            currentGroup = null;
        }

        int startPosition = 0;
        while (true) {
            // may be equal to page.getPositionCount() if the end is not found in this page
            int nextGroupStart = findNextGroupStart(startPosition, groupByPage);
            addRowsToAggregates(page, startPosition, nextGroupStart - 1);

            if (nextGroupStart < page.getPositionCount()) {
                // current group stops somewhere in the middle of the page, so flush it
                evaluateAndFlushGroup(page, startPosition);
                startPosition = nextGroupStart;
            }
            else {
                currentGroup = page.getRegion(page.getPositionCount() - 1, 1);
                return;
            }
        }
    }

    private void addRowsToAggregates(Page page, int startPosition, int endPosition)
    {
        Page region = page.getRegion(startPosition, endPosition - startPosition + 1);
        for (Aggregator aggregator : aggregates) {
            aggregator.processPage(region);
        }
    }

    private void evaluateAndFlushGroup(Page page, int position)
    {
        pageBuilder.declarePosition();
        for (int i = 0; i < groupByTypes.size(); i++) {
            Block block = page.getBlock(groupByChannels[i]);
            Type type = groupByTypes.get(i);
            type.appendTo(block, position, pageBuilder.getBlockBuilder(i));
        }
        int offset = groupByTypes.size();
        for (int i = 0; i < aggregates.size(); i++) {
            aggregates.get(i).evaluate(pageBuilder.getBlockBuilder(offset + i));
        }

        if (pageBuilder.isFull()) {
            outputPages.add(pageBuilder.build());
            pageBuilder.reset();
        }

        aggregates = setupAggregates(step, accumulatorFactories);
    }

    private int findNextGroupStart(int startPosition, Page page)
    {
        for (int i = startPosition + 1; i < page.getPositionCount(); i++) {
            if (!pagesHashStrategy.rowEqualsRow(startPosition, page, i, page)) {
                return i;
            }
        }

        return page.getPositionCount();
    }

    @Override
    public Page getOutput()
    {
        if (!outputPages.isEmpty()) {
            return outputPages.removeFirst();
        }

        return null;
    }

    @Override
    public void finish()
    {
        finishing = true;

        if (currentGroup != null) {
            evaluateAndFlushGroup(currentGroup, 0);
            currentGroup = null;
        }

        if (!pageBuilder.isEmpty()) {
            outputPages.add(pageBuilder.build());
            pageBuilder.reset();
        }
    }

    @Override
    public boolean isFinished()
    {
        return finishing && outputPages.isEmpty() && currentGroup == null && pageBuilder.isEmpty();
    }
}