SortingFileWriterFactory.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.common.block.SortOrder;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PageSorter;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
import io.airlift.units.DataSize;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;

import java.io.IOException;
import java.util.List;
import java.util.Optional;

import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR;
import static com.google.common.base.Preconditions.checkState;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class SortingFileWriterFactory
{
    private final HdfsEnvironment hdfsEnvironment;
    private final ConnectorSession session;
    private final JobConf conf;
    private final List<Type> types;
    private final List<Integer> sortFields;
    private final List<SortOrder> sortOrders;
    private final DataSize sortBufferSize;
    private final int maxOpenSortFiles;
    private final PageSorter pageSorter;
    private final OrcFileWriterFactory orcFileWriterFactory;
    private final boolean sortedWriteToTempPathEnabled;
    private final int sortedWriteTempFileSubdirectoryCount;

    public SortingFileWriterFactory(
            HdfsEnvironment hdfsEnvironment,
            ConnectorSession session,
            JobConf conf,
            List<Type> types,
            List<Integer> sortFields,
            List<SortOrder> sortOrders,
            DataSize sortBufferSize,
            int maxOpenSortFiles,
            PageSorter pageSorter,
            OrcFileWriterFactory orcFileWriterFactory,
            boolean sortedWriteToTempPathEnabled,
            int sortedWriteTempFileSubdirectoryCount)
    {
        this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
        this.session = requireNonNull(session, "session is null");
        this.conf = requireNonNull(conf, "conf is null");
        this.types = ImmutableList.copyOf(requireNonNull(types, "types is null"));
        this.sortFields = ImmutableList.copyOf(requireNonNull(sortFields, "sortFields is null"));
        this.sortOrders = ImmutableList.copyOf(requireNonNull(sortOrders, "sortOrders is null"));
        this.sortBufferSize = requireNonNull(sortBufferSize, "sortBufferSize is null");
        this.maxOpenSortFiles = maxOpenSortFiles;
        this.pageSorter = requireNonNull(pageSorter, "pageSorter is null");
        this.orcFileWriterFactory = requireNonNull(orcFileWriterFactory, "orcFileWriterFactory is null");
        this.sortedWriteToTempPathEnabled = sortedWriteToTempPathEnabled;
        this.sortedWriteTempFileSubdirectoryCount = sortedWriteTempFileSubdirectoryCount;
    }

    public SortingFileWriter createSortingFileWriter(Path path, HiveFileWriter outputWriter, int fileNumber, Optional<Path> tempPath)
    {
        checkState(tempPath.isPresent() == sortedWriteToTempPathEnabled, "tempPath existence is not consistent with sortedWriteToTempPathEnabled config");

        FileSystem fileSystem;
        try {
            fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        }
        catch (IOException e) {
            throw new PrestoException(HIVE_WRITER_OPEN_ERROR, e);
        }

        Path prefix = sortedWriteToTempPathEnabled
                ? new Path(tempPath.get(), format(".tmp-sort-%s/.tmp-sort-%s", fileNumber % sortedWriteTempFileSubdirectoryCount, path.getName()))
                : new Path(path.getParent(), ".tmp-sort." + path.getName());
        return new SortingFileWriter(
                fileSystem,
                prefix,
                outputWriter,
                sortBufferSize,
                maxOpenSortFiles,
                types,
                sortFields,
                sortOrders,
                pageSorter,
                (fs, p) -> orcFileWriterFactory.createDataSink(session, fs, p),
                sortedWriteToTempPathEnabled);
    }
}