StringStatisticsBuilder.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;
import com.facebook.presto.common.block.Block;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import java.util.List;
import java.util.Optional;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static java.lang.Math.addExact;
import static java.util.Objects.requireNonNull;
public class StringStatisticsBuilder
implements SliceColumnStatisticsBuilder
{
private final int stringStatisticsLimitInBytes;
private long nonNullValueCount;
private long storageSize;
private long rawSize;
private Slice minimum;
private Slice maximum;
private long sum;
public StringStatisticsBuilder(int stringStatisticsLimitInBytes)
{
this(stringStatisticsLimitInBytes, 0, null, null, 0);
}
private StringStatisticsBuilder(int stringStatisticsLimitInBytes, long nonNullValueCount, Slice minimum, Slice maximum, long sum)
{
this.stringStatisticsLimitInBytes = stringStatisticsLimitInBytes;
this.nonNullValueCount = nonNullValueCount;
this.minimum = minimum;
this.maximum = maximum;
this.sum = sum;
}
public StringStatisticsBuilder withStringStatisticsLimit(int limitInBytes)
{
checkArgument(limitInBytes >= 0, "limitInBytes is less than 0");
return new StringStatisticsBuilder(limitInBytes, nonNullValueCount, minimum, maximum, sum);
}
public long getNonNullValueCount()
{
return nonNullValueCount;
}
@Override
public void addValue(Block block, int position)
{
requireNonNull(block, "block is null");
int sliceLength = block.getSliceLength(position);
if (nonNullValueCount == 0) {
checkState(minimum == null && maximum == null);
Slice minMaxSlice = block.getSlice(position, 0, sliceLength);
minimum = minMaxSlice;
maximum = minMaxSlice;
}
else if (minimum != null && block.bytesCompare(position, 0, sliceLength, minimum, 0, minimum.length()) <= 0) {
minimum = block.getSlice(position, 0, sliceLength);
}
else if (maximum != null && block.bytesCompare(position, 0, sliceLength, maximum, 0, maximum.length()) >= 0) {
maximum = block.getSlice(position, 0, sliceLength);
}
nonNullValueCount++;
sum = addExact(sum, sliceLength);
}
/**
* This method can only be used in merging stats.
* It assumes min or max could be nulls.
*/
private void addStringStatistics(long valueCount, StringStatistics value)
{
requireNonNull(value, "value is null");
checkArgument(valueCount > 0, "valueCount is 0");
checkArgument(value.getMin() != null || value.getMax() != null, "min and max cannot both be null");
if (nonNullValueCount == 0) {
checkState(minimum == null && maximum == null);
minimum = value.getMin();
maximum = value.getMax();
}
else {
if (minimum != null && (value.getMin() == null || minimum.compareTo(value.getMin()) > 0)) {
minimum = value.getMin();
}
if (maximum != null && (value.getMax() == null || maximum.compareTo(value.getMax()) < 0)) {
maximum = value.getMax();
}
}
nonNullValueCount += valueCount;
sum = addExact(sum, value.getSum());
}
private Optional<StringStatistics> buildStringStatistics()
{
if (nonNullValueCount == 0) {
return Optional.empty();
}
minimum = dropStringMinMaxIfNecessary(minimum);
maximum = dropStringMinMaxIfNecessary(maximum);
return Optional.of(new StringStatistics(minimum, maximum, sum));
}
@Override
public ColumnStatistics buildColumnStatistics()
{
Optional<StringStatistics> stringStatistics = buildStringStatistics();
if (stringStatistics.isPresent()) {
verify(nonNullValueCount > 0);
return new StringColumnStatistics(nonNullValueCount, null, rawSize, storageSize, stringStatistics.get());
}
return new ColumnStatistics(nonNullValueCount, null, rawSize, storageSize);
}
@Override
public void incrementRawSize(long rawSize)
{
this.rawSize += rawSize;
}
@Override
public void incrementSize(long storageSize)
{
this.storageSize += storageSize;
}
public static Optional<StringStatistics> mergeStringStatistics(List<ColumnStatistics> stats)
{
// no need to set the stats limit for the builder given we assume the given stats are within the same limit
StringStatisticsBuilder stringStatisticsBuilder = new StringStatisticsBuilder(Integer.MAX_VALUE);
for (ColumnStatistics columnStatistics : stats) {
StringStatistics partialStatistics = columnStatistics.getStringStatistics();
if (columnStatistics.getNumberOfValues() > 0) {
if (partialStatistics == null || (partialStatistics.getMin() == null && partialStatistics.getMax() == null)) {
// there are non null values but no statistics, so we can not say anything about the data
return Optional.empty();
}
stringStatisticsBuilder.addStringStatistics(columnStatistics.getNumberOfValues(), partialStatistics);
}
}
return stringStatisticsBuilder.buildStringStatistics();
}
private Slice dropStringMinMaxIfNecessary(Slice minOrMax)
{
if (minOrMax == null || minOrMax.length() > stringStatisticsLimitInBytes) {
return null;
}
// Do not hold the entire slice where the actual stats could be small
if (minOrMax.isCompact()) {
return minOrMax;
}
return Slices.copyOf(minOrMax);
}
}