TestHistogramCalculator.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.spi.statistics;
import com.facebook.presto.common.predicate.Range;
import org.testng.annotations.Test;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.spi.statistics.HistogramCalculator.calculateFilterFactor;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.NaN;
import static java.lang.Double.POSITIVE_INFINITY;
import static org.testng.Assert.assertEquals;
public class TestHistogramCalculator
{
@Test
public void testCalculateFilterFactor()
{
Range zeroToTen = range(0, 10);
Range empty = Range.range(DOUBLE, NaN, true, NaN, true);
// Equal ranges
assertFilterFactor(Estimate.of(1.0), zeroToTen, 10, uniformHist(0, 10), 5);
assertFilterFactor(Estimate.of(1.0), zeroToTen, 10, uniformHist(0, 10), 20);
// Some overlap
assertFilterFactor(Estimate.of(0.5), range(5, 3000), 5, uniformHist(zeroToTen), 10);
// Single value overlap
assertFilterFactor(Estimate.of(1.0 / 10), range(3, 3), 1, uniformHist(zeroToTen), 10);
assertFilterFactor(Estimate.of(1.0 / 10), range(10, 100), 357, uniformHist(zeroToTen), 10);
// No overlap
assertFilterFactor(Estimate.zero(), range(20, 30), 10, uniformHist(zeroToTen), 10);
// Empty ranges
assertFilterFactor(Estimate.zero(), zeroToTen, 10, uniformHist(empty), 0);
assertFilterFactor(Estimate.zero(), empty, 0, uniformHist(zeroToTen), 10);
// no test for (empty, empty) since any return value is correct
assertFilterFactor(Estimate.zero(), unboundedRange(), 10, uniformHist(empty), 0);
assertFilterFactor(Estimate.zero(), empty, 0, uniformHist(unboundedRange()), 10);
// Unbounded (infinite), NDV-based
assertFilterFactor(Estimate.of(0.5), unboundedRange(), 10, uniformHist(unboundedRange()), 20);
assertFilterFactor(Estimate.of(1.0), unboundedRange(), 20, uniformHist(unboundedRange()), 10);
// NEW TESTS (TPC-H Q2)
// unbounded ranges
assertFilterFactor(Estimate.of(.5), unboundedRange(), 0.5, uniformHist(unboundedRange()), NaN);
// unbounded ranges with limited distinct values
assertFilterFactor(Estimate.of(0.2), unboundedRange(), 1.0,
domainConstrained(unboundedRange(), uniformHist(unboundedRange())), 5.0);
}
private static Range range(double low, double high)
{
return Range.range(DOUBLE, low, true, high, true);
}
private static Range unboundedRange()
{
return Range.all(DOUBLE);
}
private static void assertFilterFactor(Estimate expected, Range range, double distinctValues, ConnectorHistogram histogram, double totalDistinctValues)
{
assertEquals(
calculateFilterFactor(range, distinctValues, histogram, Estimate.estimateFromDouble(totalDistinctValues), true),
expected);
}
private static ConnectorHistogram uniformHist(Range range)
{
return uniformHist(range.getLow().getObjectValue().map(Double.class::cast).orElse(NEGATIVE_INFINITY),
range.getHigh().getObjectValue().map(Double.class::cast).orElse(POSITIVE_INFINITY));
}
private static ConnectorHistogram uniformHist(double low, double high)
{
return new UniformDistributionHistogram(low, high);
}
private static ConnectorHistogram domainConstrained(Range range, ConnectorHistogram source)
{
return DisjointRangeDomainHistogram.addDisjunction(source, range);
}
}