Coverage Report

Created: 2024-04-24 06:23

/src/tarantool/src/lib/salad/bloom.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef TARANTOOL_LIB_SALAD_BLOOM_H_INCLUDED
2
#define TARANTOOL_LIB_SALAD_BLOOM_H_INCLUDED
3
/*
4
 * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
5
 *
6
 * Redistribution and use in source and binary forms, with or
7
 * without modification, are permitted provided that the following
8
 * conditions are met:
9
 *
10
 * 1. Redistributions of source code must retain the above
11
 *    copyright notice, this list of conditions and the
12
 *    following disclaimer.
13
 *
14
 * 2. Redistributions in binary form must reproduce the above
15
 *    copyright notice, this list of conditions and the following
16
 *    disclaimer in the documentation and/or other materials
17
 *    provided with the distribution.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
20
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23
 * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
30
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
 * SUCH DAMAGE.
32
 */
33
34
/* Classic bloom filter with several improvements
35
 * 1) Cache oblivious:
36
 *  Putze, F.; Sanders, P.; Singler, J. (2007),
37
 *  "Cache-, Hash- and Space-Efficient Bloom Filters"
38
 *  http://algo2.iti.kit.edu/singler/publications/cacheefficientbloomfilters-wea2007.pdf
39
 * 2) Fast hash function calculation:
40
 *  Kirsch, Adam; Mitzenmacher, Michael (2006)
41
 *  "Less Hashing, Same Performance: Building a Better Bloom Filter"
42
 *   https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
43
 * 3) Using only one hash value that is splitted into several independent parts
44
 */
45
46
#include <stdint.h>
47
#include <stdbool.h>
48
#include <stddef.h>
49
#include <limits.h>
50
#include "bit/bit.h"
51
52
#if defined(__cplusplus)
53
extern "C" {
54
#endif /* defined(__cplusplus) */
55
56
enum {
57
  /* Expected cache line of target processor */
58
  BLOOM_CACHE_LINE = 64,
59
};
60
61
typedef uint32_t bloom_hash_t;
62
63
/**
64
 * Cache-line-size block of bloom filter
65
 */
66
struct bloom_block {
67
  unsigned char bits[BLOOM_CACHE_LINE];
68
};
69
70
/**
71
 * Bloom filter data structure
72
 */
73
struct bloom {
74
  /* Number of buckets (blocks) in the table */
75
  uint32_t table_size;
76
  /* Number of hash function per value */
77
  uint16_t hash_count;
78
  /* Bit field table */
79
  struct bloom_block *table;
80
};
81
82
/* {{{ API declaration */
83
84
/**
85
 * Allocate and initialize an instance of bloom filter
86
 *
87
 * @param bloom - structure to initialize
88
 * @param number_of_values - estimated number of values to be added
89
 * @param false_positive_rate - desired false positive rate
90
 * @return 0 - OK, -1 - memory error
91
 */
92
int
93
bloom_create(struct bloom *bloom, uint32_t number_of_values,
94
       double false_positive_rate);
95
96
/**
97
 * Free resources of the bloom filter
98
 *
99
 * @param bloom - the bloom filter
100
 */
101
void
102
bloom_destroy(struct bloom *bloom);
103
104
/**
105
 * Add a value into the data set
106
 * @param bloom - the bloom filter
107
 * @param hash - hash of the value
108
 */
109
static void
110
bloom_add(struct bloom *bloom, bloom_hash_t hash);
111
112
/**
113
 * Query for presence of a value in the data set
114
 * @param bloom - the bloom filter
115
 * @param hash - hash of the value
116
 * @return true - the value could be in data set; false - the value is
117
 *  definitively not in data set
118
 *
119
 */
120
static bool
121
bloom_maybe_has(const struct bloom *bloom, bloom_hash_t hash);
122
123
/**
124
 * Return the expected false positive rate of a bloom filter.
125
 * @param bloom - the bloom filter
126
 * @param number_of_values - number of values stored in the filter
127
 * @return - expected false positive rate
128
 */
129
double
130
bloom_fpr(const struct bloom *bloom, uint32_t number_of_values);
131
132
/**
133
 * Calculate size of a buffer that is needed for storing bloom table
134
 * @param bloom - the bloom filter to store
135
 * @return - Exact size
136
 */
137
size_t
138
bloom_store_size(const struct bloom *bloom);
139
140
/**
141
 * Store bloom filter table to the given buffer
142
 * Other struct bloom members must be stored manually.
143
 * @param bloom - the bloom filter to store
144
 * @param table - buffer to store to
145
 * #return - end of written buffer
146
 */
147
char *
148
bloom_store(const struct bloom *bloom, char *table);
149
150
/**
151
 * Allocate table and load it from given buffer.
152
 * Other struct bloom members must be loaded manually.
153
 *
154
 * @param bloom - structure to load to
155
 * @param table - data to load
156
 * @return 0 - OK, -1 - memory error
157
 */
158
int
159
bloom_load_table(struct bloom *bloom, const char *table);
160
161
/* }}} API declaration */
162
163
/* {{{ API definition */
164
165
static inline void
166
bloom_add(struct bloom *bloom, bloom_hash_t hash)
167
0
{
168
  /* Using lower part of the has for finding a block */
169
0
  bloom_hash_t pos = hash % bloom->table_size;
170
0
  hash = hash / bloom->table_size;
171
  /* __builtin_prefetch(bloom->table + pos, 1); */
172
0
  const bloom_hash_t bloom_block_bits = BLOOM_CACHE_LINE * CHAR_BIT;
173
  /* bit_no in block is less than bloom_block_bits (512).
174
   * split the given hash into independent lower part and high part. */
175
0
  bloom_hash_t hash2 = hash / bloom_block_bits + 1;
176
0
  for (bloom_hash_t i = 0; i < bloom->hash_count; i++) {
177
0
    bloom_hash_t bit_no = hash % bloom_block_bits;
178
0
    bit_set(bloom->table[pos].bits, bit_no);
179
    /* Combine two hashes to create required number of hashes */
180
    /* Add i**2 for better distribution */
181
0
    hash += hash2 + i * i;
182
0
  }
183
0
}
Unexecuted instantiation: vy_stmt.c:bloom_add
Unexecuted instantiation: vy_run.c:bloom_add
Unexecuted instantiation: tuple_bloom.c:bloom_add
Unexecuted instantiation: bloom.c:bloom_add
184
185
static inline bool
186
bloom_maybe_has(const struct bloom *bloom, bloom_hash_t hash)
187
0
{
188
  /* Using lower part of the has for finding a block */
189
0
  bloom_hash_t pos = hash % bloom->table_size;
190
0
  hash = hash / bloom->table_size;
191
  /* __builtin_prefetch(bloom->table + pos, 0); */
192
0
  const bloom_hash_t bloom_block_bits = BLOOM_CACHE_LINE * CHAR_BIT;
193
  /* bit_no in block is less than bloom_block_bits (512).
194
   * split the given hash into independent lower part and high part. */
195
0
  bloom_hash_t hash2 = hash / bloom_block_bits + 1;
196
0
  for (bloom_hash_t i = 0; i < bloom->hash_count; i++) {
197
0
    bloom_hash_t bit_no = hash % bloom_block_bits;
198
0
    if (!bit_test(bloom->table[pos].bits, bit_no))
199
0
      return false;
200
    /* Combine two hashes to create required number of hashes */
201
    /* Add i**2 for better distribution */
202
0
    hash += hash2 + i * i;
203
0
  }
204
0
  return true;
205
0
}
Unexecuted instantiation: vy_stmt.c:bloom_maybe_has
Unexecuted instantiation: vy_run.c:bloom_maybe_has
Unexecuted instantiation: tuple_bloom.c:bloom_maybe_has
Unexecuted instantiation: bloom.c:bloom_maybe_has
206
207
/* }}} API definition */
208
209
#if defined(__cplusplus)
210
} /* extern "C" */
211
#endif /* defined(__cplusplus) */
212
213
#endif /* TARANTOOL_LIB_SALAD_BLOOM_H_INCLUDED */