AbstractTestFixedHistogramAggregation.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation.differentialentropy;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction;
import com.google.common.collect.ImmutableList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static com.facebook.presto.block.BlockAssertions.createRLEBlock;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
abstract class AbstractTestFixedHistogramAggregation
extends AbstractTestAggregationFunction
{
private static final int NUM_BINS = 5;
protected final String method;
protected AbstractTestFixedHistogramAggregation(String method)
{
this.method = method;
}
@Override
public Block[] getSequenceBlocks(int start, int length)
{
int positionCount = 2 * length;
BlockBuilder samples = DOUBLE.createBlockBuilder(null, positionCount);
BlockBuilder weights = DOUBLE.createBlockBuilder(null, positionCount);
for (int weight = 1; weight < 3; weight++) {
for (int i = start; i < start + length; i++) {
int bin = Math.max(Math.min(i, NUM_BINS - 1), 0);
DOUBLE.writeDouble(samples, bin);
DOUBLE.writeDouble(weights, weight);
}
}
return new Block[] {
createRLEBlock(NUM_BINS, positionCount),
samples.build(),
weights.build(),
createRLEBlock(this.method, positionCount),
createRLEBlock(0.0, positionCount),
createRLEBlock((double) NUM_BINS, positionCount)
};
}
@Override
protected String getFunctionName()
{
return "differential_entropy";
}
@Override
protected List<String> getFunctionParameterTypes()
{
return ImmutableList.of(
StandardTypes.INTEGER,
StandardTypes.DOUBLE,
StandardTypes.DOUBLE,
StandardTypes.VARCHAR,
StandardTypes.DOUBLE,
StandardTypes.DOUBLE);
}
protected static void generateSamplesAndWeights(int start, int length, List<Double> samples, List<Double> weights)
{
for (int weight = 1; weight < 3; weight++) {
for (int i = start; i < start + length; i++) {
int bin = Math.max(Math.min(i, NUM_BINS - 1), 0);
samples.add(Double.valueOf(bin));
weights.add(Double.valueOf(weight));
}
}
}
protected static double calculateEntropy(List<Double> samples, List<Double> weights)
{
double totalWeight = weights.stream().mapToDouble(weight -> weight).sum();
if (totalWeight == 0.0) {
return Double.NaN;
}
Map<Double, Double> bucketWeights = new HashMap<>();
for (int i = 0; i < samples.size(); i++) {
double sample = samples.get(i);
double weight = weights.get(i);
bucketWeights.put(sample, bucketWeights.getOrDefault(sample, 0.0) + weight);
}
double entropy = bucketWeights.values().stream()
.mapToDouble(weight -> weight == 0.0 ? 0.0 : weight / totalWeight * Math.log(totalWeight / weight))
.sum();
return entropy / Math.log(2);
}
}