TestSfmSketch.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation.noisyaggregation.sketch;
import io.airlift.slice.Slice;
import org.openjdk.jol.info.ClassLayout;
import org.testng.annotations.Test;
import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertTrue;
public class TestSfmSketch
{
@Test
public void testComputeIndex()
{
for (int indexBitLength : new int[] {6, 12, 18}) {
long index = 5L;
long hash = index << (Long.SIZE - indexBitLength);
assertEquals(SfmSketch.computeIndex(hash, indexBitLength), index);
}
}
@Test
public void testIndexBitLength()
{
for (int i = 1; i < 20; i++) {
assertEquals(SfmSketch.indexBitLength((int) Math.pow(2, i)), i);
}
}
@Test
public void testNumberOfTrailingZeros()
{
for (int indexBitLength : new int[] {6, 12, 18}) {
for (int i = 0; i < Long.SIZE - 1; i++) {
long hash = 1L << i;
assertEquals(SfmSketch.numberOfTrailingZeros(hash, indexBitLength), Math.min(i, Long.SIZE - indexBitLength));
}
}
}
@Test
public void testNumberOfBuckets()
{
for (int i = 1; i < 20; i++) {
assertEquals(SfmSketch.numberOfBuckets(i), Math.round(Math.pow(2, i)));
}
}
@Test
public void testPowerOf2()
{
for (int i = 1; i < 20; i++) {
assertTrue(SfmSketch.isPowerOf2(Math.round(Math.pow(2, i))));
assertFalse(SfmSketch.isPowerOf2(Math.round(Math.pow(2, i)) + 1));
}
}
@Test
public void testRoundTrip()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
for (int i = 0; i < 100_000; i++) {
sketch.add(i);
}
sketch.enablePrivacy(2, new TestingSeededRandomizationStrategy(1));
Slice serialized = sketch.serialize();
SfmSketch unserialized = SfmSketch.deserialize(serialized);
assertSlicesEqual(serialized, unserialized.serialize());
}
@Test
public void testPrivacyEnabled()
{
SfmSketch sketch = SfmSketch.create(32, 24);
assertFalse(sketch.isPrivacyEnabled());
sketch.enablePrivacy(SfmSketch.NON_PRIVATE_EPSILON);
assertFalse(sketch.isPrivacyEnabled());
sketch.enablePrivacy(1.23);
assertTrue(sketch.isPrivacyEnabled());
}
@Test
public void testSerializedSize()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
sketch.enablePrivacy(1.23);
assertEquals(sketch.estimatedSerializedSize(), sketch.serialize().length());
}
@Test
public void testRetainedSize()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
sketch.enablePrivacy(4);
assertEquals(sketch.getRetainedSizeInBytes(),
ClassLayout.parseClass(SfmSketch.class).instanceSize() +
sketch.getBitmap().getRetainedSizeInBytes());
}
@Test
public void testBitmapSize()
{
int[] buckets = {32, 64, 512, 1024, 4096, 32768};
int[] precisions = {1, 2, 3, 8, 24, 32};
for (int numberOfBuckets : buckets) {
for (int precision : precisions) {
SfmSketch sketch = SfmSketch.create(numberOfBuckets, precision);
assertEquals(sketch.getBitmap().length(), numberOfBuckets * precision);
}
}
}
@Test
public void testMergeNonPrivate()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
SfmSketch sketch2 = SfmSketch.create(4096, 24);
// insert 100,000 non-negative integers + 100,000 negative integers
for (int i = 0; i < 100_000; i++) {
sketch.add(i);
sketch2.add(-i - 1);
}
Bitmap refBitmap = sketch.getBitmap().clone(); // clone old bitmap
sketch.mergeWith(sketch2);
// The two bitmaps should be merged with OR,
// and the resulting bitmap is not private.
refBitmap.or(sketch2.getBitmap());
assertEquals(sketch.getBitmap().toBytes(), refBitmap.toBytes());
assertFalse(sketch.isPrivacyEnabled());
}
@Test
public void testMergePrivate()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
SfmSketch sketch2 = SfmSketch.create(4096, 24);
// insert 100,000 non-negative integers + 100,000 negative integers
for (int i = 0; i < 100_000; i++) {
sketch.add(i);
sketch2.add(-i - 1);
}
Bitmap nonPrivateBitmap = sketch.getBitmap().clone();
Bitmap nonPrivateBitmap2 = sketch2.getBitmap().clone();
sketch.enablePrivacy(3, new TestingSeededRandomizationStrategy(1));
sketch2.enablePrivacy(4, new TestingSeededRandomizationStrategy(2));
double p1 = sketch.getRandomizedResponseProbability();
double p2 = sketch2.getRandomizedResponseProbability();
Bitmap refBitmap = sketch.getBitmap().clone(); // clone existing bitmap
refBitmap.or(sketch2.getBitmap()); // take an OR with sketch2, for later comparison
sketch.mergeWith(sketch2, new TestingSeededRandomizationStrategy(3));
// The resulting bitmap is a randomized merge equivalent to a noisy (not deterministic) OR.
// As a result, the bitmap should not equal an OR, but it should have roughly the same number
// of 1-bits as an OR that is flipped with the merged randomizedResponseProbability.
// The resulting merged sketch is private.
assertTrue(sketch.isPrivacyEnabled());
assertEquals(sketch.getRandomizedResponseProbability(), SfmSketch.mergeRandomizedResponseProbabilities(p1, p2));
assertNotEquals(sketch.getBitmap().toBytes(), refBitmap.toBytes());
int actualBitCount = sketch.getBitmap().getBitCount();
Bitmap hypotheticalBitmap = nonPrivateBitmap.clone();
hypotheticalBitmap.or(nonPrivateBitmap2);
hypotheticalBitmap.flipAll(sketch.getRandomizedResponseProbability(), new TestingSeededRandomizationStrategy(1));
// The number of 1-bits in the merged sketch should approximately equal the number of 1-bits in our hypothetical bitmap.
assertEquals(hypotheticalBitmap.getBitCount(), actualBitCount, 100);
}
@Test
public void testMergeMixed()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
SfmSketch sketch2 = SfmSketch.create(4096, 24);
for (int i = 0; i < 100_000; i++) {
sketch.add(i);
sketch2.add(-i - 1);
}
sketch2.enablePrivacy(3, new TestingSeededRandomizationStrategy(1));
Bitmap before = sketch.getBitmap().clone();
sketch.mergeWith(sketch2, new TestingSeededRandomizationStrategy(2));
// The resulting sketch is private.
assertTrue(sketch.isPrivacyEnabled());
// A mixed-privacy merge is mathematically similar to a normal private merge, but
// it turns out that some bits are deterministic. In particular, the bits of the
// merged sketch corresponding to 0s in the non-private sketch should exactly match
// the private sketch.
for (int i = 0; i < before.length(); i++) {
if (!before.getBit(i)) {
assertEquals(sketch.getBitmap().getBit(i), sketch2.getBitmap().getBit(i));
}
}
}
@Test
public void testMergedProbabilities()
{
// should be symmetric
assertEquals(SfmSketch.mergeRandomizedResponseProbabilities(0.1, 0.2), SfmSketch.mergeRandomizedResponseProbabilities(0.2, 0.1));
// private + nonprivate = private
assertEquals(SfmSketch.mergeRandomizedResponseProbabilities(0, 0.1), 0.1);
assertEquals(SfmSketch.mergeRandomizedResponseProbabilities(0.15, 0), 0.15);
// nonprivate + nonprivate = nonprivate
assertEquals(SfmSketch.mergeRandomizedResponseProbabilities(0.0, 0.0), 0.0);
// private + private = private (noisier)
// In particular, according to https://arxiv.org/pdf/2302.02056.pdf, Theorem 4.8, two sketches
// with epsilon1 and epsilon2 should have a merged epsilonStar of:
// -log(e^-epsilon1 + e^-epsilon2 - e^-(epsilon1 + epsilon2))
double epsilon1 = 1.2;
double epsilon2 = 3.4;
double p1 = SfmSketch.getRandomizedResponseProbability(epsilon1);
double p2 = SfmSketch.getRandomizedResponseProbability(epsilon2);
double epsilonStar = -Math.log(Math.exp(-epsilon1) + Math.exp(-epsilon2) - Math.exp(-(epsilon1 + epsilon2)));
double pStar = SfmSketch.getRandomizedResponseProbability(epsilonStar);
assertEquals(SfmSketch.mergeRandomizedResponseProbabilities(p1, p2), pStar, 1E-6);
// note: the merged sketch is noisier (higher probability of flipped bits)
assertTrue(pStar > Math.max(p1, p2));
}
@Test
public void testEmptySketchCardinality()
{
SfmSketch nonPrivateSketch = SfmSketch.create(4096, 24);
SfmSketch privateSketch = SfmSketch.create(4096, 24);
privateSketch.enablePrivacy(3, new TestingSeededRandomizationStrategy(1));
// Non-private should return exactly 0
assertEquals(nonPrivateSketch.cardinality(), 0);
// Private will be a noisy sketch, so it should return approximately zero, but will be rather noisy.
assertEquals(privateSketch.cardinality(), 0, 200);
}
@Test
public void testSmallCardinality()
{
int[] ns = {1, 5, 10, 50, 100, 200, 500, 1000};
for (int n : ns) {
SfmSketch nonPrivateSketch = SfmSketch.create(4096, 24);
SfmSketch privateSketch = SfmSketch.create(4096, 24);
for (int i = 0; i < n; i++) {
nonPrivateSketch.add(i);
privateSketch.add(i);
}
privateSketch.enablePrivacy(3, new TestingSeededRandomizationStrategy(1));
// Non-private should actually be quite good for small numbers
assertEquals(nonPrivateSketch.cardinality(), n, Math.max(10, 0.1 * n));
// Private isn't quite as good...
assertEquals(privateSketch.cardinality(), n, 200);
}
}
@Test
public void testActualCardinalityEstimates()
{
// Note: this is slow for cardinalities beyond, say, 1 million. See `testSimulatedCardinalityEstimates` below.
int[] magnitudes = {4, 5, 6};
double[] epsilons = {2, 4, SfmSketch.NON_PRIVATE_EPSILON};
for (int mag : magnitudes) {
int n = (int) Math.pow(10, mag);
for (double eps : epsilons) {
SfmSketch sketch = SfmSketch.create(4096, 24);
for (int i = 0; i < n; i++) {
sketch.add(i);
}
sketch.enablePrivacy(eps, new TestingSeededRandomizationStrategy(1));
assertEquals(sketch.cardinality(), n, n * 0.05); // answers should be accurate to within 5% (arbitrary)
}
}
}
@Test
public void testSimulatedCardinalityEstimates()
{
// Instead of creating sketches by adding items, we simulate them for fast testing of huge cardinalities.
// For reference, 10^33 is one decillion.
// The goal here is to test general functionality and numerical stability.
int[] magnitudes = {6, 9, 12, 15, 18, 21, 24, 27, 30, 33};
double[] epsilons = {4, SfmSketch.NON_PRIVATE_EPSILON};
for (int mag : magnitudes) {
int n = (int) Math.pow(10, mag);
for (double eps : epsilons) {
SfmSketch sketch = createSketchWithTargetCardinality(4096, 24, eps, n);
assertEquals(sketch.cardinality(), n, n * 0.1);
}
}
}
@Test
public void testMergedCardinalities()
{
double[] epsilons = {3, 4, SfmSketch.NON_PRIVATE_EPSILON};
// Test each pair of epsilons
// This gives us equal private epsilons, unequal private epsilons, mixed private and nonprivate, and totally nonprivate
for (double eps1 : epsilons) {
for (double eps2 : epsilons) {
SfmSketch sketch = SfmSketch.create(4096, 24);
SfmSketch sketch2 = SfmSketch.create(4096, 24);
// insert 300,000 positive integers and 200,000 negative integers
for (int i = 0; i < 300_000; i++) {
sketch.add(i + 1);
if (i < 200_000) {
sketch2.add(-i);
}
}
sketch.enablePrivacy(eps1, new TestingSeededRandomizationStrategy(1));
sketch2.enablePrivacy(eps2, new TestingSeededRandomizationStrategy(2));
sketch.mergeWith(sketch2);
assertEquals(sketch.cardinality(), 500_000, 500_000 * 0.1);
}
}
}
@Test
public void testEnablePrivacy()
{
SfmSketch sketch = SfmSketch.create(4096, 24);
double epsilon = 4;
for (int i = 0; i < 100_000; i++) {
sketch.add(i);
}
long cardinalityBefore = sketch.cardinality();
sketch.enablePrivacy(epsilon, new TestingSeededRandomizationStrategy(1));
long cardinalityAfter = sketch.cardinality();
// Randomized response probability should reflect the new (private) epsilon
assertEquals(sketch.getRandomizedResponseProbability(), SfmSketch.getRandomizedResponseProbability(epsilon));
assertTrue(sketch.isPrivacyEnabled());
// Cardinality should remain approximately the same
assertEquals(cardinalityAfter, cardinalityBefore, cardinalityBefore * 0.1);
}
private static SfmSketch createSketchWithTargetCardinality(int numberOfBuckets, int precision, double epsilon, int cardinality)
{
// Building a sketch by adding items is really slow (O(n)) if you want to test billions/trillions/quadrillions/etc.
// Simulating the sketch is much faster (O(buckets * precision)).
RandomizationStrategy randomizationStrategy = new TestingSeededRandomizationStrategy(1);
SfmSketch sketch = SfmSketch.create(numberOfBuckets, precision);
Bitmap bitmap = sketch.getBitmap();
double c1 = sketch.getOnProbability();
double c2 = sketch.getOnProbability() - sketch.getRandomizedResponseProbability();
for (int l = 0; l < precision; l++) {
double p = c1 - c2 * Math.pow(1 - Math.pow(2, -(l + 1)) / numberOfBuckets, cardinality);
for (int b = 0; b < numberOfBuckets; b++) {
bitmap.setBit(sketch.getBitLocation(b, l), randomizationStrategy.nextBoolean(p));
}
}
sketch.enablePrivacy(epsilon, randomizationStrategy);
return sketch;
}
}