TestNoisyApproximateCountDistinctSfmAggregation.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.operator.aggregation.noisyaggregation;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.operator.aggregation.noisyaggregation.sketch.SfmSketch;
import org.testng.annotations.Test;

import static com.facebook.presto.block.BlockAssertions.createDoubleSequenceBlock;
import static com.facebook.presto.block.BlockAssertions.createLongSequenceBlock;
import static com.facebook.presto.block.BlockAssertions.createStringSequenceBlock;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;

public class TestNoisyApproximateCountDistinctSfmAggregation
        extends AbstractTestNoisySfmAggregation
{
    protected String getFunctionName()
    {
        return "noisy_approx_distinct_sfm";
    }

    protected long getCardinalityFromResult(Object result)
    {
        return new Long(result.toString());
    }

    @Test
    public void testNonPrivateIntegerCount()
    {
        Block valuesBlock = createLongSequenceBlock(1, 100_000);
        // These estimates are deterministic (no privacy).
        assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 99_466, 0);
        assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 8192, 100_219, 0);
        assertCardinality(valuesBlock, BIGINT, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, 100_102, 0);
    }

    @Test
    public void testPrivateIntegerCount()
    {
        Block valuesBlock = createLongSequenceBlock(1, 100_000);
        // These estimates are random, but not too noisy.
        assertCardinality(valuesBlock, BIGINT, 8, 100_000, 25_000);
        assertCardinality(valuesBlock, BIGINT, 8, 8192, 100_000, 25_000);
        assertCardinality(valuesBlock, BIGINT, 8, 2048, 32, 100_000, 25_000);
    }

    @Test
    public void testNonPrivateDoubleCount()
    {
        Block valuesBlock = createDoubleSequenceBlock(1, 100_000);
        // These estimates are deterministic (no privacy).
        assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 99_670, 0);
        assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 8192, 100_078, 0);
        assertCardinality(valuesBlock, DOUBLE, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, 98_350, 0);
    }

    @Test
    public void testPrivateDoubleCount()
    {
        Block valuesBlock = createDoubleSequenceBlock(1, 100_000);
        // These estimates are random, but not too noisy.
        assertCardinality(valuesBlock, DOUBLE, 8, 100_000, 25_000);
        assertCardinality(valuesBlock, DOUBLE, 8, 8192, 100_000, 25_000);
        assertCardinality(valuesBlock, DOUBLE, 8, 2048, 32, 100_000, 25_000);
    }

    @Test
    public void testNonPrivateStringCount()
    {
        Block valuesBlock = createStringSequenceBlock(1, 100_000);
        // These estimates are deterministic.
        assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 100_190, 0);
        assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 8192, 99_982, 0);
        assertCardinality(valuesBlock, VARCHAR, SfmSketch.NON_PRIVATE_EPSILON, 2048, 32, 100_773, 0);
    }

    @Test
    public void testPrivateStringCount()
    {
        Block valuesBlock = createStringSequenceBlock(1, 100_000);
        // These estimates are random, but not too noisy.
        assertCardinality(valuesBlock, VARCHAR, 8, 100_000, 25_000);
        assertCardinality(valuesBlock, VARCHAR, 8, 8192, 100_000, 25_000);
        assertCardinality(valuesBlock, VARCHAR, 8, 2048, 32, 100_000, 25_000);
    }
}