HyperLogLogFunctions.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.scalar;
import com.facebook.airlift.stats.cardinality.HyperLogLog;
import com.facebook.airlift.stats.cardinality.PrivateLpcaSketch;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlNullable;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import static com.facebook.presto.operator.aggregation.ApproximateSetAggregation.DEFAULT_STANDARD_ERROR;
import static com.facebook.presto.operator.aggregation.HyperLogLogUtils.standardErrorToBuckets;
import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static com.facebook.presto.spi.function.SqlFunctionVisibility.EXPERIMENTAL;
import static com.facebook.presto.util.Failures.checkCondition;
public final class HyperLogLogFunctions
{
private HyperLogLogFunctions() {}
@ScalarFunction
@Description("compute the cardinality of a HyperLogLog instance")
@SqlType(StandardTypes.BIGINT)
public static long cardinality(@SqlType(StandardTypes.HYPER_LOG_LOG) Slice serializedHll)
{
return HyperLogLog.newInstance(serializedHll).cardinality();
}
@ScalarFunction(visibility = EXPERIMENTAL)
@Description("compute the noisy cardinality of a HyperLogLog instance")
@SqlType(StandardTypes.BIGINT)
public static long noisyCardinality(@SqlType(StandardTypes.HYPER_LOG_LOG) Slice serializedHll, @SqlType(StandardTypes.DOUBLE) double epsilon)
{
checkCondition(epsilon > 0, INVALID_FUNCTION_ARGUMENT, "Epsilon must be greater than 0");
PrivateLpcaSketch privacySketch = new PrivateLpcaSketch(HyperLogLog.newInstance(serializedHll), 0.1 * epsilon, 0.9 * epsilon);
return privacySketch.cardinality();
}
@ScalarFunction
@Description("an empty HyperLogLog instance")
@SqlType(StandardTypes.HYPER_LOG_LOG)
public static Slice emptyApproxSet()
{
return HyperLogLog.newInstance(standardErrorToBuckets(DEFAULT_STANDARD_ERROR)).serialize();
}
@ScalarFunction
@Description("an empty HyperLogLog instance with the specified max standard error")
@SqlType(StandardTypes.HYPER_LOG_LOG)
public static Slice emptyApproxSet(@SqlType(StandardTypes.DOUBLE) double maxStandardError)
{
return HyperLogLog.newInstance(standardErrorToBuckets(maxStandardError)).serialize();
}
@ScalarFunction("merge_hll")
@Description("merge the contents of an array of HyperLogLogs")
@SqlType(StandardTypes.HYPER_LOG_LOG)
@SqlNullable
public static Slice scalarMerge(@SqlType("array(HyperLogLog)") Block block)
{
if (block.getPositionCount() == 0) {
return null;
}
HyperLogLog merged = null;
int firstNonNullIndex = 0;
while (firstNonNullIndex < block.getPositionCount() && block.isNull(firstNonNullIndex)) {
firstNonNullIndex++;
}
if (firstNonNullIndex == block.getPositionCount()) {
return null;
}
Slice initialSlice = block.getSlice(firstNonNullIndex, 0, block.getSliceLength(firstNonNullIndex));
merged = HyperLogLog.newInstance(initialSlice);
for (int i = firstNonNullIndex + 1; i < block.getPositionCount(); i++) {
Slice currentSlice = block.getSlice(i, 0, block.getSliceLength(i));
if (!block.isNull(i)) {
merged.mergeWith(HyperLogLog.newInstance(currentSlice));
}
}
return merged.serialize();
}
}