RddAndMore.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.spark;

import com.facebook.presto.spark.classloader_interface.MutablePartitionId;
import com.facebook.presto.spark.classloader_interface.PrestoSparkTaskOutput;
import com.facebook.presto.spi.plan.PartitioningHandle;
import com.google.common.collect.ImmutableList;
import org.apache.spark.Dependency;
import org.apache.spark.ShuffleDependency;
import org.apache.spark.SparkException;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.rdd.RDD;
import scala.Tuple2;
import scala.collection.JavaConverters;

import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;

import static com.facebook.presto.spark.util.PrestoSparkUtils.getActionResultWithTimeout;
import static com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION;
import static com.google.common.base.Preconditions.checkState;
import static java.util.Objects.requireNonNull;

public class RddAndMore<T extends PrestoSparkTaskOutput>
{
    private final JavaPairRDD<MutablePartitionId, T> rdd;
    private final List<PrestoSparkBroadcastDependency<?>> broadcastDependencies;
    private final Optional<PartitioningHandle> partitioningHandle;
    private boolean collected;

    public RddAndMore(
            JavaPairRDD<MutablePartitionId, T> rdd,
            List<PrestoSparkBroadcastDependency<?>> broadcastDependencies)
    {
        this(rdd, broadcastDependencies, Optional.empty());
    }

    public RddAndMore(
            JavaPairRDD<MutablePartitionId, T> rdd,
            List<PrestoSparkBroadcastDependency<?>> broadcastDependencies,
            Optional<PartitioningHandle> partitioningHandle)
    {
        this.rdd = requireNonNull(rdd, "rdd is null");
        this.broadcastDependencies = ImmutableList.copyOf(requireNonNull(broadcastDependencies, "broadcastDependencies is null"));
        this.partitioningHandle = requireNonNull(partitioningHandle, "partitioningHandle is null");
    }

    public List<Tuple2<MutablePartitionId, T>> collectAndDestroyDependenciesWithTimeout(long timeout, TimeUnit timeUnit, Set<PrestoSparkServiceWaitTimeMetrics> waitTimeMetrics)
            throws SparkException, TimeoutException
    {
        checkState(!collected, "already collected");
        collected = true;
        List<Tuple2<MutablePartitionId, T>> result = getActionResultWithTimeout(rdd.collectAsync(), timeout, timeUnit, waitTimeMetrics);
        broadcastDependencies.forEach(PrestoSparkBroadcastDependency::destroy);
        return result;
    }

    public JavaPairRDD<MutablePartitionId, T> getRdd()
    {
        return rdd;
    }

    public List<PrestoSparkBroadcastDependency<?>> getBroadcastDependencies()
    {
        return broadcastDependencies;
    }

    // Returns shuffle dependencies of underlying RDD
    public List<ShuffleDependency> getShuffleDependencies()
    {
        RDD underlyingRdd = getRdd().rdd();
        Collection<Dependency> dependencies = JavaConverters.asJavaCollectionConverter(underlyingRdd.getDependencies()).asJavaCollection();
        return dependencies.stream()
                .filter(a -> a instanceof ShuffleDependency)
                .map(b -> (ShuffleDependency) b)
                .collect(Collectors.toList());
    }

    public Optional<PartitioningHandle> getPartitioningHandle()
    {
        return partitioningHandle;
    }

    public boolean isBroadcastDistribution()
    {
        return this.getPartitioningHandle().isPresent() && this.getPartitioningHandle().get().equals(FIXED_BROADCAST_DISTRIBUTION);
    }
}