TestNoisyApproximateSetSfmAggregation.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation.noisyaggregation;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.SqlVarbinary;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.operator.aggregation.noisyaggregation.sketch.SfmSketch;
import org.testng.annotations.Test;
import static com.facebook.presto.block.BlockAssertions.createDoubleSequenceBlock;
import static com.facebook.presto.block.BlockAssertions.createLongSequenceBlock;
import static com.facebook.presto.block.BlockAssertions.createStringSequenceBlock;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
/**
* Tests for the noisy_approx_set_sfm function.
* Overall, these are similar to the tests of noisy_approx_distinct_sfm, but with an extra check
* to ensure that the size of the returned sketch matches the parameters specified (or defaulted).
*/
public class TestNoisyApproximateSetSfmAggregation
extends AbstractTestNoisySfmAggregation
{
protected String getFunctionName()
{
return "noisy_approx_set_sfm";
}
protected long getCardinalityFromResult(Object result)
{
return getSketchFromResult(result).cardinality();
}
private boolean sketchSizesMatch(Object a, Object b)
{
SfmSketch sketchA = getSketchFromResult(a);
SfmSketch sketchB = getSketchFromResult(b);
return sketchA.getBitmap().length() == sketchB.getBitmap().length();
}
private void assertSketchSize(Block valuesBlock, Type valueType, double epsilon, int numberOfBuckets, int precision, SqlVarbinary expected)
{
assertFunction(valuesBlock, valueType, epsilon, numberOfBuckets, precision, this::sketchSizesMatch, expected);
}
private void assertSketchSize(Block valuesBlock, Type valueType, double epsilon, int numberOfBuckets, SqlVarbinary expected)
{
assertFunction(valuesBlock, valueType, epsilon, numberOfBuckets, this::sketchSizesMatch, expected);
}
private void assertSketchSize(Block valuesBlock, Type valueType, double epsilon, SqlVarbinary expected)
{
assertFunction(valuesBlock, valueType, epsilon, this::sketchSizesMatch, expected);
}
@Test
public void testNonPrivateInteger()
{
Block valuesBlock = createLongSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createLongSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, refSketch, 0);
assertSketchSize(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, refSketch);
refSketch = toSqlVarbinary(createLongSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch, 0);
assertSketchSize(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch);
refSketch = toSqlVarbinary(createLongSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch, 0);
assertSketchSize(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch);
}
@Test
public void testPrivateInteger()
{
Block valuesBlock = createLongSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createLongSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, 8, refSketch, 50_000);
assertSketchSize(valuesBlock, BIGINT, 8, refSketch);
refSketch = toSqlVarbinary(createLongSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, 8, 8192, refSketch, 50_000);
assertSketchSize(valuesBlock, BIGINT, 8, 8192, refSketch);
refSketch = toSqlVarbinary(createLongSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, BIGINT, 8, 2048, 32, refSketch, 50_000);
assertSketchSize(valuesBlock, BIGINT, 8, 2048, 32, refSketch);
}
@Test
public void testNonPrivateDouble()
{
Block valuesBlock = createDoubleSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createDoubleSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, refSketch, 0);
assertSketchSize(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, refSketch);
refSketch = toSqlVarbinary(createDoubleSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch, 0);
assertSketchSize(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch);
refSketch = toSqlVarbinary(createDoubleSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch, 0);
assertSketchSize(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch);
}
@Test
public void testPrivateDouble()
{
Block valuesBlock = createDoubleSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createDoubleSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, 8, refSketch, 50_000);
assertSketchSize(valuesBlock, DOUBLE, 8, refSketch);
refSketch = toSqlVarbinary(createDoubleSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, 8, 8192, refSketch, 50_000);
assertSketchSize(valuesBlock, DOUBLE, 8, 8192, refSketch);
refSketch = toSqlVarbinary(createDoubleSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, DOUBLE, 8, 2048, 32, refSketch, 50_000);
assertSketchSize(valuesBlock, DOUBLE, 8, 2048, 32, refSketch);
}
@Test
public void testNonPrivateString()
{
Block valuesBlock = createStringSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createStringSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, refSketch, 0);
assertSketchSize(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, refSketch);
refSketch = toSqlVarbinary(createStringSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch, 0);
assertSketchSize(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 8192, refSketch);
refSketch = toSqlVarbinary(createStringSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch, 0);
assertSketchSize(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, refSketch);
}
@Test
public void testPrivateString()
{
Block valuesBlock = createStringSequenceBlock(1, 100_000);
SqlVarbinary refSketch = toSqlVarbinary(createStringSketch(4096, 24, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, 8, refSketch, 50_000);
assertSketchSize(valuesBlock, VARCHAR, 8, refSketch);
refSketch = toSqlVarbinary(createStringSketch(8192, 24, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, 8, 8192, refSketch, 50_000);
assertSketchSize(valuesBlock, VARCHAR, 8, 8192, refSketch);
refSketch = toSqlVarbinary(createStringSketch(2048, 32, 1, 100_000));
assertCardinality(valuesBlock, VARCHAR, 8, 2048, 32, refSketch, 50_000);
assertSketchSize(valuesBlock, VARCHAR, 8, 2048, 32, refSketch);
}
}