TestSfmSketchFunctions.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.scalar;
import com.facebook.presto.operator.aggregation.noisyaggregation.sketch.SfmSketch;
import com.google.common.io.BaseEncoding;
import org.testng.annotations.Test;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.testing.assertions.Assert.assertEquals;
public class TestSfmSketchFunctions
extends AbstractTestFunctions
{
@Test
public void testCardinality()
{
SfmSketch sketch = createSketch(1, 10_000, 4);
assertEquals(SfmSketchFunctions.cardinality(sketch.serialize()), sketch.cardinality());
}
@Test
public void testEmptyApproxSet()
{
// with no privacy (epsilon = infinity), an empty approx set should return 0 cardinality
assertFunction("cardinality(noisy_empty_approx_set_sfm(infinity()))", BIGINT, 0L);
assertFunction("cardinality(noisy_empty_approx_set_sfm(infinity(), 4096))", BIGINT, 0L);
assertFunction("cardinality(noisy_empty_approx_set_sfm(infinity(), 4096, 24))", BIGINT, 0L);
}
@Test
public void testCastRoundTrip()
{
assertFunction("cardinality(CAST(CAST(noisy_empty_approx_set_sfm(infinity()) AS VARBINARY) AS SFMSKETCH))", BIGINT, 0L);
}
@Test
public void testMergeNullArray()
{
assertFunction("merge_sfm(ARRAY[NULL, NULL, NULL]) IS NULL", BOOLEAN, true);
}
@Test
public void testMergeEmptyArray()
{
// calling with an empty array should return NULL
assertFunction("merge_sfm(ARRAY[]) IS NULL", BOOLEAN, true);
}
@Test
public void testMergeSingleArray()
{
// merging a single SFM sketch should simply return the sketch
String sketchProjection = getSketchProjection(createSketch(1, 10_000, 3));
assertFunction("cardinality(merge_sfm(ARRAY[" + sketchProjection + "])) = cardinality(" + sketchProjection + ")", BOOLEAN, true);
}
@Test
public void testMergeManyArrays()
{
// merging many sketches should return a single merged sketch
// (using non-private sketches here for a deterministic test)
String sketchProjection1 = getSketchProjection(createSketch(1, 50, SfmSketch.NON_PRIVATE_EPSILON));
String sketchProjection2 = getSketchProjection(createSketch(51, 200, SfmSketch.NON_PRIVATE_EPSILON));
String sketchProjection3 = getSketchProjection(createSketch(100, 300, SfmSketch.NON_PRIVATE_EPSILON));
String sketchProjectionMerged = getSketchProjection(createSketch(1, 300, SfmSketch.NON_PRIVATE_EPSILON));
String arrayProjection = "ARRAY[" + sketchProjection1 + ", " + sketchProjection2 + ", " + sketchProjection3 + "]";
assertFunction("CAST(merge_sfm(" + arrayProjection + ") AS VARBINARY) = CAST(" + sketchProjectionMerged + " AS VARBINARY)", BOOLEAN, true);
}
private SfmSketch createSketch(int start, int end, double epsilon)
{
SfmSketch sketch = SfmSketch.create(2048, 16);
for (int i = start; i <= end; i++) {
sketch.add(i);
}
if (epsilon < SfmSketch.NON_PRIVATE_EPSILON) {
sketch.enablePrivacy(epsilon);
}
return sketch;
}
private String getSketchProjection(SfmSketch sketch)
{
byte[] binary = sketch.serialize().getBytes();
String encoded = BaseEncoding.base16().lowerCase().encode(binary);
return "CAST(X'" + encoded + "' AS SFMSKETCH)";
}
}