MapColumnStatisticsBuilder.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc.metadata.statistics;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.orc.proto.DwrfProto;
import com.google.common.collect.ImmutableList;
import it.unimi.dsi.fastutil.objects.Object2LongMap;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

public class MapColumnStatisticsBuilder
        implements StatisticsBuilder
{
    private final ImmutableList.Builder<MapStatisticsEntry> entries = new ImmutableList.Builder<>();
    private final boolean collectKeyStats;

    private long nonNullValueCount;
    private long storageSize;
    private long rawSize;
    private boolean hasEntries;

    /**
     * @param collectKeyStats - if true the builder will collect key entries and produce MapColumnStatistics,
     * if false the builder won't collect key entries and will produce generic ColumnStatistics
     */
    public MapColumnStatisticsBuilder(boolean collectKeyStats)
    {
        this.collectKeyStats = collectKeyStats;
    }

    @Override
    public void addBlock(Type type, Block block)
    {
        throw new UnsupportedOperationException();
    }

    // Note: MapColumnStatisticsBuilder doesn't check the uniqueness of the keys
    public void addMapStatistics(DwrfProto.KeyInfo key, ColumnStatistics columnStatistics)
    {
        requireNonNull(key, "key is null");
        requireNonNull(columnStatistics, "columnStatistics is null");
        hasEntries = true;
        if (collectKeyStats) {
            entries.add(new MapStatisticsEntry(key, columnStatistics));
        }
    }

    public void increaseValueCount(long count)
    {
        checkArgument(count >= 0, "count is negative");
        nonNullValueCount += count;
    }

    private Optional<MapStatistics> buildMapStatistics()
    {
        if (hasEntries && collectKeyStats) {
            MapStatistics mapStatistics = new MapStatistics(entries.build());
            return Optional.of(mapStatistics);
        }
        return Optional.empty();
    }

    @Override
    public ColumnStatistics buildColumnStatistics()
    {
        if (hasEntries && collectKeyStats) {
            MapStatistics mapStatistics = new MapStatistics(entries.build());
            return new MapColumnStatistics(nonNullValueCount, null, rawSize, storageSize, mapStatistics);
        }
        return new ColumnStatistics(nonNullValueCount, null, rawSize, storageSize);
    }

    @Override
    public void incrementRawSize(long rawSize)
    {
        this.rawSize += rawSize;
    }

    @Override
    public void incrementSize(long storageSize)
    {
        this.storageSize += storageSize;
    }

    public static Optional<MapStatistics> mergeMapStatistics(List<ColumnStatistics> stats, Object2LongMap<DwrfProto.KeyInfo> keySizes)
    {
        Map<DwrfProto.KeyInfo, List<ColumnStatistics>> columnStatisticsByKey = new LinkedHashMap<>();
        long nonNullValueCount = 0;

        for (ColumnStatistics columnStatistics : stats) {
            if (columnStatistics.getNumberOfValues() > 0) {
                MapStatistics partialStatistics = columnStatistics.getMapStatistics();
                if (partialStatistics == null) {
                    // there are non-null values but no statistics, so we can not say anything about the data
                    return Optional.empty();
                }

                // collect column stats for each key for merging later
                for (MapStatisticsEntry entry : partialStatistics.getEntries()) {
                    List<ColumnStatistics> allKeyStats = columnStatisticsByKey.computeIfAbsent(entry.getKey(), (k) -> new ArrayList<>());
                    allKeyStats.add(entry.getColumnStatistics());
                }
                nonNullValueCount += columnStatistics.getNumberOfValues();
            }
        }

        // merge all column stats for each key
        MapColumnStatisticsBuilder mapStatisticsBuilder = new MapColumnStatisticsBuilder(true);
        for (Map.Entry<DwrfProto.KeyInfo, List<ColumnStatistics>> entry : columnStatisticsByKey.entrySet()) {
            DwrfProto.KeyInfo key = entry.getKey();
            Long keySize = keySizes != null ? keySizes.getLong(key) : null;
            ColumnStatistics mergedColumnStatistics = mergeColumnStatistics(entry.getValue(), keySize, null);
            mapStatisticsBuilder.addMapStatistics(key, mergedColumnStatistics);
        }
        mapStatisticsBuilder.increaseValueCount(nonNullValueCount);

        return mapStatisticsBuilder.buildMapStatistics();
    }
}