/src/tor/src/lib/container/bloomfilt.c
Line | Count | Source |
1 | | /* Copyright (c) 2003-2004, Roger Dingledine |
2 | | * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. |
3 | | * Copyright (c) 2007-2021, The Tor Project, Inc. */ |
4 | | /* See LICENSE for licensing information */ |
5 | | |
6 | | /** |
7 | | * \file bloomfilt.c |
8 | | * \brief Uses bitarray_t to implement a bloom filter. |
9 | | **/ |
10 | | |
11 | | #include <stdlib.h> |
12 | | |
13 | | #include "lib/malloc/malloc.h" |
14 | | #include "lib/container/bloomfilt.h" |
15 | | #include "lib/intmath/bits.h" |
16 | | #include "lib/log/util_bug.h" |
17 | | #include "ext/siphash.h" |
18 | | |
19 | | /** How many bloom-filter bits we set per address. This is twice the |
20 | | * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit |
21 | | * values. */ |
22 | 0 | #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2) |
23 | | |
24 | | struct bloomfilt_t { |
25 | | /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each |
26 | | * items. */ |
27 | | struct sipkey key[BLOOMFILT_N_HASHES]; |
28 | | bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */ |
29 | | uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always |
30 | | * one less than a power of two. */ |
31 | | bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ |
32 | | }; |
33 | | |
34 | 0 | #define BIT(set, n) ((n) & (set)->mask) |
35 | | |
36 | | /** Add the element <b>item</b> to <b>set</b>. */ |
37 | | void |
38 | | bloomfilt_add(bloomfilt_t *set, |
39 | | const void *item) |
40 | 0 | { |
41 | 0 | int i; |
42 | 0 | for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { |
43 | 0 | uint64_t h = set->hashfn(&set->key[i], item); |
44 | 0 | uint32_t high_bits = (uint32_t)(h >> 32); |
45 | 0 | uint32_t low_bits = (uint32_t)(h); |
46 | 0 | bitarray_set(set->ba, BIT(set, high_bits)); |
47 | 0 | bitarray_set(set->ba, BIT(set, low_bits)); |
48 | 0 | } |
49 | 0 | } |
50 | | |
51 | | /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise, |
52 | | * <em>probably</em> return zero. */ |
53 | | int |
54 | | bloomfilt_probably_contains(const bloomfilt_t *set, |
55 | | const void *item) |
56 | 0 | { |
57 | 0 | int i, matches = 0; |
58 | 0 | for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { |
59 | 0 | uint64_t h = set->hashfn(&set->key[i], item); |
60 | 0 | uint32_t high_bits = (uint32_t)(h >> 32); |
61 | 0 | uint32_t low_bits = (uint32_t)(h); |
62 | | // Note that !! is necessary here, since bitarray_is_set does not |
63 | | // necessarily return 1 on true. |
64 | 0 | matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); |
65 | 0 | matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); |
66 | 0 | } |
67 | 0 | return matches == N_BITS_PER_ITEM; |
68 | 0 | } |
69 | | |
70 | | /** Return a newly allocated bloomfilt_t, optimized to hold a total of |
71 | | * <b>max_elements</b> elements with a reasonably low false positive weight. |
72 | | * |
73 | | * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide |
74 | | * functions of the items, and the key material <b>random_key</b> to |
75 | | * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key. |
76 | | **/ |
77 | | bloomfilt_t * |
78 | | bloomfilt_new(int max_elements, |
79 | | bloomfilt_hash_fn hashfn, |
80 | | const uint8_t *random_key) |
81 | 0 | { |
82 | | /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k |
83 | | * is the number of hash functions per entry, m is the bits in the array, |
84 | | * and n is the number of elements inserted. For us, k==4, n<=max_elements, |
85 | | * and m==n_bits= approximately max_elements*32. This gives |
86 | | * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019 |
87 | | * |
88 | | * It would be more optimal in space vs false positives to get this false |
89 | | * positive rate by going for k==13, and m==18.5n, but we also want to |
90 | | * conserve CPU, and k==13 is pretty big. |
91 | | */ |
92 | 0 | int n_bits = 1u << (tor_log2(max_elements)+5); |
93 | 0 | bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t)); |
94 | 0 | r->mask = n_bits - 1; |
95 | 0 | r->ba = bitarray_init_zero(n_bits); |
96 | |
|
97 | 0 | tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN); |
98 | 0 | memcpy(r->key, random_key, sizeof(r->key)); |
99 | |
|
100 | 0 | r->hashfn = hashfn; |
101 | |
|
102 | 0 | return r; |
103 | 0 | } |
104 | | |
105 | | /** Free all storage held in <b>set</b>. */ |
106 | | void |
107 | | bloomfilt_free_(bloomfilt_t *set) |
108 | 0 | { |
109 | 0 | if (!set) |
110 | 0 | return; |
111 | 0 | bitarray_free(set->ba); |
112 | | tor_free(set); |
113 | 0 | } |