EntropyCalculations.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation.differentialentropy;
import java.util.Arrays;
import static com.google.common.base.Verify.verify;
import static java.lang.Math.toIntExact;
public class EntropyCalculations
{
private EntropyCalculations() {}
/**
* @implNote Based on Alizadeh Noughabi, Hadi & Arghami, N. (2010). "A New Estimator of Entropy".
*/
public static double calculateFromSamplesUsingVasicek(double[] samples)
{
if (samples.length == 0) {
return Double.NaN;
}
Arrays.sort(samples);
int n = samples.length;
int m = toIntExact(Math.max(Math.round(Math.sqrt(n)), 2));
double entropy = 0;
for (int i = 0; i < n; i++) {
double sIPlusM = i + m < n ? samples[i + m] : samples[n - 1];
double sIMinusM = i - m > 0 ? samples[i - m] : samples[0];
double aI = i + m < n && i - m > 0 ? 2 : 1;
entropy += Math.log(n / (aI * m) * (sIPlusM - sIMinusM));
}
return entropy / n / Math.log(2);
}
static double calculateEntropyFromHistogramAggregates(double width, double sumWeight, double sumWeightLogWeight)
{
verify(sumWeight > 0.0);
return Math.max((Math.log(width * sumWeight) - sumWeightLogWeight / sumWeight) / Math.log(2.0), 0.0);
}
}