HudiRealtimeSplitConverter.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.util;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import static com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY;
import static java.util.Objects.requireNonNull;

/**
 * HoodieRealtimeFileSplit specific implementation of CustomSplitConverter.
 * Extracts customSplitInfo from HoodieRealtimeFileSplit and reconstructs HoodieRealtimeFileSplit from Map.
 */
public class HudiRealtimeSplitConverter
        implements CustomSplitConverter
{
    private static final Splitter SPLITTER = Splitter.on(",").omitEmptyStrings();

    public static final String HUDI_DELTA_FILEPATHS_KEY = "hudi_delta_filepaths";
    public static final String HUDI_BASEPATH_KEY = "hudi_basepath";
    public static final String HUDI_MAX_COMMIT_TIME_KEY = "hudi_max_commit_time";

    @Override
    public Optional<Map<String, String>> extractCustomSplitInfo(FileSplit split)
    {
        if (split instanceof HoodieRealtimeFileSplit) {
            HoodieRealtimeFileSplit hudiSplit = (HoodieRealtimeFileSplit) split;
            Map<String, String> customSplitInfo = ImmutableMap.<String, String>builder()
                    .put(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeFileSplit.class.getName())
                    .put(HUDI_DELTA_FILEPATHS_KEY, String.join(",", hudiSplit.getDeltaLogPaths()))
                    .put(HUDI_BASEPATH_KEY, hudiSplit.getBasePath())
                    .put(HUDI_MAX_COMMIT_TIME_KEY, hudiSplit.getMaxCommitTime())
                    .build();
            return Optional.of(customSplitInfo);
        }
        return Optional.empty();
    }

    @Override
    public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo)
            throws IOException
    {
        String customSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
        if (HoodieRealtimeFileSplit.class.getName().equals(customSplitClass)) {
            requireNonNull(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY), "HUDI_DELTA_FILEPATHS_KEY is missing");
            List<String> deltaLogPaths = SPLITTER.splitToList(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY));
            List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
            return Optional.of(new HoodieRealtimeFileSplit(
                    split,
                    requireNonNull(customSplitInfo.get(HUDI_BASEPATH_KEY), "HUDI_BASEPATH_KEY is missing"),
                    deltaLogFiles,
                    requireNonNull(customSplitInfo.get(HUDI_MAX_COMMIT_TIME_KEY), "HUDI_MAX_COMMIT_TIME_KEY is missing"),
                    // false as incremental query is not supported yet
                    false,
                    Option.empty()));
        }
        return Optional.empty();
    }
}