KllHistogram.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg.statistics;
import com.facebook.presto.common.type.AbstractIntType;
import com.facebook.presto.common.type.AbstractLongType;
import com.facebook.presto.common.type.AbstractVarcharType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.statistics.ConnectorHistogram;
import com.facebook.presto.spi.statistics.Estimate;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import org.apache.datasketches.common.ArrayOfBooleansSerDe;
import org.apache.datasketches.common.ArrayOfDoublesSerDe;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.ArrayOfLongsSerDe;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.kll.KllItemsSketch;
import org.apache.datasketches.memory.Memory;
import org.openjdk.jol.info.ClassLayout;
import java.util.Comparator;
import java.util.function.Function;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.Decimals.isLongDecimal;
import static com.facebook.presto.common.type.Decimals.isShortDecimal;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.RealType.REAL;
import static com.facebook.presto.common.type.SmallintType.SMALLINT;
import static com.facebook.presto.common.type.TinyintType.TINYINT;
import static com.facebook.presto.common.type.TypeUtils.isNumericType;
import static com.facebook.presto.spi.StandardErrorCode.INVALID_ARGUMENTS;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Verify.verify;
import static java.nio.ByteOrder.LITTLE_ENDIAN;
import static java.util.Objects.requireNonNull;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
public class KllHistogram
implements ConnectorHistogram
{
private static final long INSTANCE_SIZE = ClassLayout.parseClass(KllHistogram.class).instanceSize();
// since the actual type parameter is only known at runtime, we can't concretely specify it
private final KllItemsSketch<Object> sketch;
private final Type type;
private final Function<Object, Double> toDouble;
private final Function<Double, Object> fromDouble;
@SuppressWarnings({"unchecked", "rawtypes"})
@JsonCreator
public KllHistogram(@JsonProperty("sketch") Slice bytes, @JsonProperty("type") Type type)
{
verify(isKllHistogramSupportedType(type), "histograms do not currently support type " + type.getDisplayName());
this.type = requireNonNull(type, "type is null");
SketchParameters parameters = getSketchParameters(type);
// the actual sketch can only accept the same object types which generated it
// however, the API can only accept or generate double types. We cast the inputs
// and results to/from double to satisfy the underlying sketch type.
if (parameters.getSerde().getClassOfT().equals(Double.class)) {
toDouble = x -> (double) x;
fromDouble = x -> x;
}
else if (parameters.getSerde().getClassOfT().equals(Long.class)) {
// dual cast to auto-box/unbox from Double/Long for sketch
toDouble = x -> (double) (long) x;
fromDouble = x -> (long) (double) x;
}
else {
throw new PrestoException(INVALID_ARGUMENTS, "can't create kll sketch from type: " + type);
}
sketch = KllItemsSketch.wrap(Memory.wrap(bytes.toByteBuffer(), LITTLE_ENDIAN), parameters.getComparator(), parameters.getSerde());
}
public static boolean isKllHistogramSupportedType(Type type)
{
try {
return isNumericType(type) ||
type instanceof AbstractIntType;
}
catch (PrestoException e) {
return false;
}
}
@JsonProperty
public Slice getSketch()
{
return Slices.wrappedBuffer(sketch.toByteArray());
}
@JsonProperty
public Type getType()
{
return type;
}
@VisibleForTesting
@SuppressWarnings("rawtypes")
public KllItemsSketch getKllSketch()
{
return sketch;
}
@Override
public Estimate cumulativeProbability(double value, boolean inclusive)
{
return Estimate.of(sketch.getRank(fromDouble.apply(value), inclusive ? INCLUSIVE : EXCLUSIVE));
}
@Override
public Estimate inverseCumulativeProbability(double percentile)
{
return Estimate.of(toDouble.apply(sketch.getQuantile(percentile)));
}
/**
* The memory utilization is dominated by the size of the sketch. This estimate
* doesn't account for the other fields in the class.
*/
@Override
public long getEstimatedSize()
{
return INSTANCE_SIZE + sketch.getSerializedSizeBytes();
}
@Override
public String toString()
{
return toStringHelper(this)
.add("type", type)
.add("k", this.sketch.getK())
.add("N", this.sketch.getN())
.add("retained", this.sketch.getNumRetained())
.add("mingetSerialized", this.sketch.getMinItem())
.add("max", this.sketch.getMaxItem())
.add("p50", sketch.getQuantile(0.5))
.add("p75", sketch.getQuantile(0.75))
.add("p90", sketch.getQuantile(0.90))
.add("p99", sketch.getQuantile(0.99))
.add("p99.9", sketch.getQuantile(0.999))
.toString();
}
private static class SketchParameters<T>
{
private final Comparator<T> comparator;
private final ArrayOfItemsSerDe<T> serde;
public SketchParameters(Comparator<T> comparator, ArrayOfItemsSerDe<T> serde)
{
this.comparator = comparator;
this.serde = serde;
}
public Comparator<T> getComparator()
{
return comparator;
}
public ArrayOfItemsSerDe<T> getSerde()
{
return serde;
}
}
private static SketchParameters<?> getSketchParameters(Type type)
{
if (type.equals(REAL)) {
return new SketchParameters<>(Double::compareTo, new ArrayOfDoublesSerDe());
}
else if (isShortDecimal(type)) {
return new SketchParameters<>(Double::compareTo, new ArrayOfDoublesSerDe());
}
else if (isLongDecimal(type)) {
return new SketchParameters<>(Double::compareTo, new ArrayOfDoublesSerDe());
}
else if (type.equals(DOUBLE)) {
return new SketchParameters<>(Double::compareTo, new ArrayOfDoublesSerDe());
}
else if (type.equals(BOOLEAN)) {
return new SketchParameters<>(Boolean::compareTo, new ArrayOfBooleansSerDe());
}
else if (type instanceof AbstractIntType || type instanceof AbstractLongType || type.equals(SMALLINT) || type.equals(TINYINT)) {
return new SketchParameters<>(Long::compareTo, new ArrayOfLongsSerDe());
}
else if (type instanceof AbstractVarcharType) {
return new SketchParameters<>(String::compareTo, new ArrayOfStringsSerDe());
}
else {
throw new PrestoException(INVALID_ARGUMENTS, "Unsupported type for KLL sketch: " + type);
}
}
}