/src/freeradius-server/src/lib/util/hash.c
Line | Count | Source |
1 | | /* |
2 | | * This program is free software; you can redistribute it and/or modify |
3 | | * it under the terms of the GNU General Public License as published by |
4 | | * the Free Software Foundation; either version 2 of the License, or |
5 | | * (at your option) any later version. |
6 | | * |
7 | | * This program is distributed in the hope that it will be useful, |
8 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | | * GNU General Public License for more details. |
11 | | * |
12 | | * You should have received a copy of the GNU General Public License |
13 | | * along with this program; if not, write to the Free Software |
14 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
15 | | */ |
16 | | |
17 | | /** Resizable hash tables |
18 | | * |
19 | | * The weird "reverse" function is based on an idea from |
20 | | * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with |
21 | | * modifications so that they're not lock-free. :( |
22 | | * |
23 | | * However, the split-order idea allows a fast & easy splitting of the |
24 | | * hash bucket chain when the hash table is resized. Without it, we'd |
25 | | * have to check & update the pointers for every node in the buck chain, |
26 | | * rather than being able to move 1/2 of the entries in the chain with |
27 | | * one update. |
28 | | * |
29 | | * @file src/lib/util/hash.c |
30 | | * |
31 | | * @copyright 2005,2006 The FreeRADIUS server project |
32 | | */ |
33 | | RCSID("$Id: 5406ac3a58d2f04b311f934782c8019129df7b4b $") |
34 | | |
35 | | #include <freeradius-devel/util/hash.h> |
36 | | |
37 | | /* |
38 | | * A reasonable number of buckets to start off with. |
39 | | * Should be a power of two. |
40 | | */ |
41 | 146k | #define FR_HASH_NUM_BUCKETS (64) |
42 | | |
43 | | struct fr_hash_entry_s { |
44 | | fr_hash_entry_t *next; |
45 | | uint32_t reversed; |
46 | | uint32_t key; |
47 | | void *data; |
48 | | }; |
49 | | |
50 | | struct fr_hash_table_s { |
51 | | uint32_t num_elements; //!< Number of elements in the hash table. |
52 | | uint32_t num_buckets; //!< Number of buckets (how long the array is) - power of 2 */ |
53 | | uint32_t next_grow; |
54 | | uint32_t mask; |
55 | | |
56 | | fr_free_t free; //!< Data free function. |
57 | | fr_hash_t hash; //!< Hashing function. |
58 | | fr_cmp_t cmp; //!< Comparison function. |
59 | | |
60 | | char const *type; //!< Talloc type to check elements against. |
61 | | |
62 | | fr_hash_entry_t null; |
63 | | fr_hash_entry_t **buckets; //!< Array of hash buckets. |
64 | | }; |
65 | | |
66 | | #ifdef TESTING |
67 | | static int grow = 0; |
68 | | #endif |
69 | | |
70 | | /* |
71 | | * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}' |
72 | | */ |
73 | | static const uint8_t reversed_byte[256] = { |
74 | | 0, 128, 64, 192, 32, 160, 96, 224, |
75 | | 16, 144, 80, 208, 48, 176, 112, 240, |
76 | | 8, 136, 72, 200, 40, 168, 104, 232, |
77 | | 24, 152, 88, 216, 56, 184, 120, 248, |
78 | | 4, 132, 68, 196, 36, 164, 100, 228, |
79 | | 20, 148, 84, 212, 52, 180, 116, 244, |
80 | | 12, 140, 76, 204, 44, 172, 108, 236, |
81 | | 28, 156, 92, 220, 60, 188, 124, 252, |
82 | | 2, 130, 66, 194, 34, 162, 98, 226, |
83 | | 18, 146, 82, 210, 50, 178, 114, 242, |
84 | | 10, 138, 74, 202, 42, 170, 106, 234, |
85 | | 26, 154, 90, 218, 58, 186, 122, 250, |
86 | | 6, 134, 70, 198, 38, 166, 102, 230, |
87 | | 22, 150, 86, 214, 54, 182, 118, 246, |
88 | | 14, 142, 78, 206, 46, 174, 110, 238, |
89 | | 30, 158, 94, 222, 62, 190, 126, 254, |
90 | | 1, 129, 65, 193, 33, 161, 97, 225, |
91 | | 17, 145, 81, 209, 49, 177, 113, 241, |
92 | | 9, 137, 73, 201, 41, 169, 105, 233, |
93 | | 25, 153, 89, 217, 57, 185, 121, 249, |
94 | | 5, 133, 69, 197, 37, 165, 101, 229, |
95 | | 21, 149, 85, 213, 53, 181, 117, 245, |
96 | | 13, 141, 77, 205, 45, 173, 109, 237, |
97 | | 29, 157, 93, 221, 61, 189, 125, 253, |
98 | | 3, 131, 67, 195, 35, 163, 99, 227, |
99 | | 19, 147, 83, 211, 51, 179, 115, 243, |
100 | | 11, 139, 75, 203, 43, 171, 107, 235, |
101 | | 27, 155, 91, 219, 59, 187, 123, 251, |
102 | | 7, 135, 71, 199, 39, 167, 103, 231, |
103 | | 23, 151, 87, 215, 55, 183, 119, 247, |
104 | | 15, 143, 79, 207, 47, 175, 111, 239, |
105 | | 31, 159, 95, 223, 63, 191, 127, 255 |
106 | | }; |
107 | | |
108 | | |
109 | | /* |
110 | | * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}' |
111 | | */ |
112 | | static uint8_t parent_byte[256] = { |
113 | | 0, 0, 0, 1, 0, 1, 2, 3, |
114 | | 0, 1, 2, 3, 4, 5, 6, 7, |
115 | | 0, 1, 2, 3, 4, 5, 6, 7, |
116 | | 8, 9, 10, 11, 12, 13, 14, 15, |
117 | | 0, 1, 2, 3, 4, 5, 6, 7, |
118 | | 8, 9, 10, 11, 12, 13, 14, 15, |
119 | | 16, 17, 18, 19, 20, 21, 22, 23, |
120 | | 24, 25, 26, 27, 28, 29, 30, 31, |
121 | | 0, 1, 2, 3, 4, 5, 6, 7, |
122 | | 8, 9, 10, 11, 12, 13, 14, 15, |
123 | | 16, 17, 18, 19, 20, 21, 22, 23, |
124 | | 24, 25, 26, 27, 28, 29, 30, 31, |
125 | | 32, 33, 34, 35, 36, 37, 38, 39, |
126 | | 40, 41, 42, 43, 44, 45, 46, 47, |
127 | | 48, 49, 50, 51, 52, 53, 54, 55, |
128 | | 56, 57, 58, 59, 60, 61, 62, 63, |
129 | | 0, 1, 2, 3, 4, 5, 6, 7, |
130 | | 8, 9, 10, 11, 12, 13, 14, 15, |
131 | | 16, 17, 18, 19, 20, 21, 22, 23, |
132 | | 24, 25, 26, 27, 28, 29, 30, 31, |
133 | | 32, 33, 34, 35, 36, 37, 38, 39, |
134 | | 40, 41, 42, 43, 44, 45, 46, 47, |
135 | | 48, 49, 50, 51, 52, 53, 54, 55, |
136 | | 56, 57, 58, 59, 60, 61, 62, 63, |
137 | | 64, 65, 66, 67, 68, 69, 70, 71, |
138 | | 72, 73, 74, 75, 76, 77, 78, 79, |
139 | | 80, 81, 82, 83, 84, 85, 86, 87, |
140 | | 88, 89, 90, 91, 92, 93, 94, 95, |
141 | | 96, 97, 98, 99, 100, 101, 102, 103, |
142 | | 104, 105, 106, 107, 108, 109, 110, 111, |
143 | | 112, 113, 114, 115, 116, 117, 118, 119, |
144 | | 120, 121, 122, 123, 124, 125, 126, 127 |
145 | | }; |
146 | | |
147 | | |
148 | | /* |
149 | | * Reverse a key. |
150 | | */ |
151 | | static uint32_t reverse(uint32_t key) |
152 | 258k | { |
153 | | /* |
154 | | * Cast to uint32_t is required because the |
155 | | * default type of of the expression is an |
156 | | * int and ubsan correctly complains that |
157 | | * the result of 0xff << 24 won't fit in a |
158 | | * signed 32bit integer. |
159 | | */ |
160 | 258k | return (((uint32_t)reversed_byte[key & 0xff] << 24) | |
161 | 258k | ((uint32_t)reversed_byte[(key >> 8) & 0xff] << 16) | |
162 | 258k | ((uint32_t)reversed_byte[(key >> 16) & 0xff] << 8) | |
163 | 258k | ((uint32_t)reversed_byte[(key >> 24) & 0xff])); |
164 | 258k | } |
165 | | |
166 | | /* |
167 | | * Take the parent by discarding the highest bit that is set. |
168 | | */ |
169 | | static uint32_t parent_of(uint32_t key) |
170 | 2.07M | { |
171 | 2.07M | if (key > 0x00ffffff) |
172 | 0 | return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24); |
173 | | |
174 | 2.07M | if (key > 0x0000ffff) |
175 | 0 | return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16); |
176 | | |
177 | 2.07M | if (key > 0x000000ff) |
178 | 1.93k | return (key & 0x000000ff) | (parent_byte[key >> 8] << 8); |
179 | | |
180 | 2.06M | return parent_byte[key]; |
181 | 2.07M | } |
182 | | |
183 | | |
184 | | static CC_NO_UBSAN(undefined) |
185 | | fr_hash_entry_t *list_find(fr_hash_table_t *ht, |
186 | | fr_hash_entry_t *head, uint32_t reversed, void const *data) |
187 | 183k | { |
188 | 183k | fr_hash_entry_t *cur; |
189 | | |
190 | 267k | for (cur = head; cur != &ht->null; cur = cur->next) { |
191 | 182k | if (cur->reversed == reversed) { |
192 | 69.9k | if (ht->cmp) { |
193 | 69.9k | int cmp = ht->cmp(data, cur->data); |
194 | 69.9k | if (cmp > 0) break; |
195 | 69.9k | if (cmp < 0) continue; |
196 | 69.9k | } |
197 | 69.8k | return cur; |
198 | 69.9k | } |
199 | 112k | if (cur->reversed > reversed) break; |
200 | 112k | } |
201 | | |
202 | 113k | return NULL; |
203 | 183k | } |
204 | | |
205 | | |
206 | | /* |
207 | | * Inserts a new entry into the list, in order. |
208 | | */ |
209 | | static CC_NO_UBSAN(undefined) |
210 | | bool list_insert(fr_hash_table_t *ht, |
211 | | fr_hash_entry_t **head, fr_hash_entry_t *node) |
212 | 75.5k | { |
213 | 75.5k | fr_hash_entry_t **last, *cur; |
214 | | |
215 | 75.5k | last = head; |
216 | | |
217 | 100k | for (cur = *head; cur != &ht->null; last = &(cur->next), cur = cur->next) { |
218 | 42.7k | if (cur->reversed > node->reversed) break; |
219 | | |
220 | 24.8k | if (cur->reversed == node->reversed) { |
221 | 194 | if (ht->cmp) { |
222 | 194 | int8_t cmp = ht->cmp(node->data, cur->data); |
223 | 194 | if (cmp > 0) break; |
224 | 194 | if (cmp < 0) continue; |
225 | 194 | } |
226 | 194 | return false; |
227 | 194 | } |
228 | 24.8k | } |
229 | | |
230 | 75.3k | node->next = *last; |
231 | 75.3k | *last = node; |
232 | | |
233 | 75.3k | return true; |
234 | 75.5k | } |
235 | | |
236 | | |
237 | | /* |
238 | | * Delete an entry from the list. |
239 | | */ |
240 | | static void list_delete(fr_hash_table_t *ht, |
241 | | fr_hash_entry_t **head, fr_hash_entry_t *node) |
242 | 36 | { |
243 | 36 | fr_hash_entry_t **last, *cur; |
244 | | |
245 | 36 | last = head; |
246 | | |
247 | 36 | for (cur = *head; cur != &ht->null; cur = cur->next) { |
248 | 36 | if (cur == node) break; |
249 | 0 | last = &(cur->next); |
250 | 0 | } |
251 | | |
252 | 36 | *last = node->next; |
253 | 36 | } |
254 | | |
255 | | static int _fr_hash_table_free(fr_hash_table_t *ht) |
256 | 36.4k | { |
257 | 36.4k | uint32_t i; |
258 | 36.4k | fr_hash_entry_t *node, *next; |
259 | | |
260 | 36.4k | if (ht->free) { |
261 | 152k | for (i = 0; i < ht->num_buckets; i++) { |
262 | 150k | if (ht->buckets[i]) for (node = ht->buckets[i]; |
263 | 56.1k | node != &ht->null; |
264 | 33.7k | node = next) { |
265 | 22.4k | next = node->next; |
266 | 22.4k | if (!node->data) continue; /* dummy entry */ |
267 | | |
268 | 22.4k | ht->free(node->data); |
269 | 22.4k | } |
270 | 150k | } |
271 | 2.30k | } |
272 | | |
273 | 36.4k | return 0; |
274 | 36.4k | } |
275 | | |
276 | | /* |
277 | | * Create the table. |
278 | | * |
279 | | * Memory usage in bytes is (20/3) * number of entries. |
280 | | */ |
281 | | fr_hash_table_t *_fr_hash_table_alloc(TALLOC_CTX *ctx, |
282 | | char const *type, |
283 | | fr_hash_t hash_func, |
284 | | fr_cmp_t cmp_func, |
285 | | fr_free_t free_func) |
286 | 36.5k | { |
287 | 36.5k | fr_hash_table_t *ht; |
288 | | |
289 | 36.5k | ht = talloc(ctx, fr_hash_table_t); |
290 | 36.5k | if (!ht) return NULL; |
291 | 36.5k | talloc_set_destructor(ht, _fr_hash_table_free); |
292 | | |
293 | 36.5k | *ht = (fr_hash_table_t){ |
294 | 36.5k | .type = type, |
295 | 36.5k | .free = free_func, |
296 | 36.5k | .hash = hash_func, |
297 | 36.5k | .cmp = cmp_func, |
298 | 36.5k | .num_buckets = FR_HASH_NUM_BUCKETS, |
299 | 36.5k | .mask = FR_HASH_NUM_BUCKETS - 1, |
300 | | |
301 | | /* |
302 | | * Have a default load factor of 2.5. In practice this |
303 | | * means that the average load will hit 3 before the |
304 | | * table grows. |
305 | | */ |
306 | 36.5k | .next_grow = (FR_HASH_NUM_BUCKETS << 1) + (FR_HASH_NUM_BUCKETS >> 1), |
307 | 36.5k | .buckets = talloc_zero_array(ht, fr_hash_entry_t *, FR_HASH_NUM_BUCKETS) |
308 | 36.5k | }; |
309 | 36.5k | if (unlikely(!ht->buckets)) { |
310 | 0 | talloc_free(ht); |
311 | 0 | return NULL; |
312 | 0 | } |
313 | | |
314 | 36.5k | ht->null.reversed = ~0; |
315 | 36.5k | ht->null.key = ~0; |
316 | 36.5k | ht->null.next = &ht->null; |
317 | 36.5k | ht->buckets[0] = &ht->null; |
318 | | |
319 | 36.5k | return ht; |
320 | 36.5k | } |
321 | | |
322 | | |
323 | | /* |
324 | | * If the current bucket is uninitialized, initialize it |
325 | | * by recursively copying information from the parent. |
326 | | * |
327 | | * We may have a situation where entry E is a parent to 2 other |
328 | | * entries E' and E". If we split E into E and E', then the |
329 | | * nodes meant for E" end up in E or E', either of which is |
330 | | * wrong. To solve that problem, we walk down the whole chain, |
331 | | * inserting the elements into the correct place. |
332 | | */ |
333 | | static void fr_hash_table_fixup(fr_hash_table_t *ht, uint32_t entry) |
334 | 2.07M | { |
335 | 2.07M | uint32_t parent_entry; |
336 | 2.07M | fr_hash_entry_t **last, *cur; |
337 | 2.07M | uint32_t this; |
338 | | |
339 | 2.07M | parent_entry = parent_of(entry); |
340 | | |
341 | | /* parent_entry == entry if and only if entry == 0 */ |
342 | | |
343 | 2.07M | if (!ht->buckets[parent_entry]) { |
344 | 1.01M | fr_hash_table_fixup(ht, parent_entry); |
345 | 1.01M | } |
346 | | |
347 | | /* |
348 | | * Keep walking down cur, trying to find entries that |
349 | | * don't belong here any more. There may be multiple |
350 | | * ones, so we can't have a naive algorithm... |
351 | | */ |
352 | 2.07M | last = &ht->buckets[parent_entry]; |
353 | 2.07M | this = parent_entry; |
354 | | |
355 | 2.10M | for (cur = *last; cur != &ht->null; cur = cur->next) { |
356 | 32.5k | uint32_t real_entry; |
357 | | |
358 | 32.5k | real_entry = cur->key & ht->mask; |
359 | 32.5k | if (real_entry != this) { /* ht->buckets[real_entry] == NULL */ |
360 | 7.88k | *last = &ht->null; |
361 | 7.88k | ht->buckets[real_entry] = cur; |
362 | 7.88k | this = real_entry; |
363 | 7.88k | } |
364 | | |
365 | 32.5k | last = &(cur->next); |
366 | 32.5k | } |
367 | | |
368 | | /* |
369 | | * We may NOT have initialized this bucket, so do it now. |
370 | | */ |
371 | 2.07M | if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null; |
372 | 2.07M | } |
373 | | |
374 | | /* |
375 | | * This should be a power of two. Changing it to 4 doesn't seem |
376 | | * to make any difference. |
377 | | */ |
378 | 236 | #define GROW_FACTOR (2) |
379 | | |
380 | | /* |
381 | | * Grow the hash table. |
382 | | */ |
383 | | static void fr_hash_table_grow(fr_hash_table_t *ht) |
384 | 118 | { |
385 | 118 | fr_hash_entry_t **buckets; |
386 | 118 | size_t existing = talloc_get_size(ht->buckets); |
387 | | |
388 | 118 | buckets = talloc_realloc(ht, ht->buckets, fr_hash_entry_t *, GROW_FACTOR * ht->num_buckets); |
389 | 118 | if (!buckets) return; |
390 | | |
391 | 118 | memset(((uint8_t *)buckets) + existing, 0, talloc_get_size(buckets) - existing); |
392 | | |
393 | 118 | ht->buckets = buckets; |
394 | 118 | ht->num_buckets *= GROW_FACTOR; |
395 | 118 | ht->next_grow *= GROW_FACTOR; |
396 | 118 | ht->mask = ht->num_buckets - 1; |
397 | | #ifdef TESTING |
398 | | grow = 1; |
399 | | fprintf(stderr, "GROW TO %d\n", ht->num_buckets); |
400 | | #endif |
401 | 118 | } |
402 | | |
403 | | /* |
404 | | * Internal find a node routine. |
405 | | */ |
406 | | static inline CC_HINT(always_inline) fr_hash_entry_t *hash_table_find(fr_hash_table_t *ht, |
407 | | uint32_t key, void const *data) |
408 | 183k | { |
409 | 183k | uint32_t entry; |
410 | 183k | uint32_t reversed; |
411 | | |
412 | 183k | entry = key & ht->mask; |
413 | 183k | reversed = reverse(key); |
414 | | |
415 | 183k | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
416 | | |
417 | 183k | return list_find(ht, ht->buckets[entry], reversed, data); |
418 | 183k | } |
419 | | |
420 | | /** Find data in a hash table |
421 | | * |
422 | | * @param[in] ht to find data in. |
423 | | * @param[in] data to find. Will be passed to the |
424 | | * hashing function. |
425 | | * @return |
426 | | * - The user data we found. |
427 | | * - NULL if we couldn't find any matching data. |
428 | | */ |
429 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
430 | | void *fr_hash_table_find(fr_hash_table_t *ht, void const *data) |
431 | 161k | { |
432 | 161k | fr_hash_entry_t *node; |
433 | | |
434 | 161k | node = hash_table_find(ht, ht->hash(data), data); |
435 | 161k | if (!node) return NULL; |
436 | | |
437 | 69.6k | return UNCONST(void *, node->data); |
438 | 161k | } |
439 | | |
440 | | /** Hash table lookup with pre-computed key |
441 | | * |
442 | | * @param[in] ht to find data in. |
443 | | * @param[in] key the precomputed key. |
444 | | * @param[in] data for list matching. |
445 | | * @return |
446 | | * - The user data we found. |
447 | | * - NULL if we couldn't find any matching data. |
448 | | */ |
449 | | void *fr_hash_table_find_by_key(fr_hash_table_t *ht, uint32_t key, void const *data) |
450 | 0 | { |
451 | 0 | fr_hash_entry_t *node; |
452 | |
|
453 | 0 | node = hash_table_find(ht, key, data); |
454 | 0 | if (!node) return NULL; |
455 | | |
456 | 0 | return UNCONST(void *, node->data); |
457 | 0 | } |
458 | | |
459 | | /** Insert data into a hash table |
460 | | * |
461 | | * @param[in] ht to insert data into. |
462 | | * @param[in] data to insert. Will be passed to the |
463 | | * hashing function. |
464 | | * @return |
465 | | * - true if data was inserted. |
466 | | * - false if data already existed and was not inserted. |
467 | | */ |
468 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
469 | | bool fr_hash_table_insert(fr_hash_table_t *ht, void const *data) |
470 | 75.5k | { |
471 | 75.5k | uint32_t key; |
472 | 75.5k | uint32_t entry; |
473 | 75.5k | uint32_t reversed; |
474 | 75.5k | fr_hash_entry_t *node; |
475 | | |
476 | 75.5k | #ifndef TALLOC_GET_TYPE_ABORT_NOOP |
477 | 75.5k | if (ht->type) (void)_talloc_get_type_abort(data, ht->type, __location__); |
478 | 75.5k | #endif |
479 | | |
480 | 75.5k | key = ht->hash(data); |
481 | 75.5k | entry = key & ht->mask; |
482 | 75.5k | reversed = reverse(key); |
483 | | |
484 | 75.5k | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
485 | | |
486 | | /* |
487 | | * If we try to do our own memory allocation here, the |
488 | | * speedup is only ~15% or so, which isn't worth it. |
489 | | */ |
490 | 75.5k | node = talloc_zero(ht, fr_hash_entry_t); |
491 | 75.5k | if (unlikely(!node)) return false; |
492 | | |
493 | 75.5k | node->next = &ht->null; |
494 | 75.5k | node->reversed = reversed; |
495 | 75.5k | node->key = key; |
496 | 75.5k | node->data = UNCONST(void *, data); |
497 | | |
498 | | /* already in the table, can't insert it */ |
499 | 75.5k | if (!list_insert(ht, &ht->buckets[entry], node)) { |
500 | 194 | talloc_free(node); |
501 | 194 | return false; |
502 | 194 | } |
503 | | |
504 | | /* |
505 | | * Check the load factor, and grow the table if |
506 | | * necessary. |
507 | | */ |
508 | 75.3k | ht->num_elements++; |
509 | 75.3k | if (ht->num_elements >= ht->next_grow) fr_hash_table_grow(ht); |
510 | | |
511 | 75.3k | return true; |
512 | 75.5k | } |
513 | | |
514 | | /** Replace old data with new data, OR insert if there is no old |
515 | | * |
516 | | * @param[out] old data that was replaced. If this argument |
517 | | * is not NULL, then the old data will not |
518 | | * be freed, even if a free function is |
519 | | * configured. |
520 | | * @param[in] ht to insert data into. |
521 | | * @param[in] data to replace. Will be passed to the |
522 | | * hashing function. |
523 | | * @return |
524 | | * - 1 if data was inserted (hash table grows) |
525 | | * - 0 if data was replaced (hash table doesn't grow) |
526 | | * - -1 if we failed to replace data |
527 | | */ |
528 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
529 | | int fr_hash_table_replace(void **old, fr_hash_table_t *ht, void const *data) |
530 | 21.6k | { |
531 | 21.6k | fr_hash_entry_t *node; |
532 | | |
533 | 21.6k | node = hash_table_find(ht, ht->hash(data), data); |
534 | 21.6k | if (!node) { |
535 | 21.4k | if (old) *old = NULL; |
536 | 21.4k | return fr_hash_table_insert(ht, data) ? 1 : -1; |
537 | 21.4k | } |
538 | | |
539 | 196 | if (old) { |
540 | 0 | *old = node->data; |
541 | 196 | } else if (ht->free) { |
542 | 0 | ht->free(node->data); |
543 | 0 | } |
544 | | |
545 | 196 | node->data = UNCONST(void *, data); |
546 | | |
547 | 196 | return 0; |
548 | 21.6k | } |
549 | | |
550 | | /** Remove an entry from the hash table, without freeing the data |
551 | | * |
552 | | * @param[in] ht to remove data from. |
553 | | * @param[in] data to remove. Will be passed to the |
554 | | * hashing function. |
555 | | * @return |
556 | | * - The user data we removed. |
557 | | * - NULL if we couldn't find any matching data. |
558 | | */ |
559 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
560 | | void *fr_hash_table_remove(fr_hash_table_t *ht, void const *data) |
561 | 36 | { |
562 | 36 | uint32_t key; |
563 | 36 | uint32_t entry; |
564 | 36 | uint32_t reversed; |
565 | 36 | void *old; |
566 | 36 | fr_hash_entry_t *node; |
567 | | |
568 | 36 | key = ht->hash(data); |
569 | 36 | entry = key & ht->mask; |
570 | 36 | reversed = reverse(key); |
571 | | |
572 | 36 | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
573 | | |
574 | 36 | node = list_find(ht, ht->buckets[entry], reversed, data); |
575 | 36 | if (!node) return NULL; |
576 | | |
577 | 36 | list_delete(ht, &ht->buckets[entry], node); |
578 | 36 | ht->num_elements--; |
579 | | |
580 | 36 | old = node->data; |
581 | 36 | talloc_free(node); |
582 | | |
583 | 36 | return old; |
584 | 36 | } |
585 | | |
586 | | /** Remove and free data (if a free function was specified) |
587 | | * |
588 | | * @param[in] ht to remove data from. |
589 | | * @param[in] data to remove/free. |
590 | | * @return |
591 | | * - true if we removed data. |
592 | | * - false if we couldn't find any matching data. |
593 | | */ |
594 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
595 | | bool fr_hash_table_delete(fr_hash_table_t *ht, void const *data) |
596 | 36 | { |
597 | 36 | void *old; |
598 | | |
599 | 36 | old = fr_hash_table_remove(ht, data); |
600 | 36 | if (!old) return false; |
601 | | |
602 | 36 | if (ht->free) ht->free(old); |
603 | | |
604 | 36 | return true; |
605 | 36 | } |
606 | | |
607 | | /* |
608 | | * Count number of elements |
609 | | */ |
610 | | CC_NO_UBSAN(function) /* UBSAN: false positive - htrie call with first argument of void * trips --fsanitize=function */ |
611 | | uint32_t fr_hash_table_num_elements(fr_hash_table_t *ht) |
612 | 18 | { |
613 | 18 | return ht->num_elements; |
614 | 18 | } |
615 | | |
616 | | /** Iterate over entries in a hash table |
617 | | * |
618 | | * @note If the hash table is modified the iterator should be considered invalidated. |
619 | | * |
620 | | * @param[in] ht to iterate over. |
621 | | * @param[in] iter Pointer to an iterator struct, used to maintain |
622 | | * state between calls. |
623 | | * @return |
624 | | * - User data. |
625 | | * - NULL if at the end of the list. |
626 | | */ |
627 | | void *fr_hash_table_iter_next(fr_hash_table_t *ht, fr_hash_iter_t *iter) |
628 | 123M | { |
629 | 123M | fr_hash_entry_t *node; |
630 | 123M | uint32_t i; |
631 | | |
632 | | /* |
633 | | * Return the next element in the bucket. |
634 | | */ |
635 | 123M | if (iter->next != &ht->null) { |
636 | 52.3M | node = iter->next; |
637 | 52.3M | iter->next = node->next; |
638 | | |
639 | 52.3M | return node->data; |
640 | 52.3M | } |
641 | | |
642 | | /* |
643 | | * We've wrapped around to bucket 0 again. That means we're done. |
644 | | */ |
645 | 70.6M | if (iter->bucket == 0) return NULL; |
646 | | |
647 | | /* |
648 | | * We might have to go through multiple empty |
649 | | * buckets to find one that contains something |
650 | | * we should return |
651 | | */ |
652 | 70.3M | i = iter->bucket - 1; |
653 | 240M | for (;;) { |
654 | 240M | if (!ht->buckets[i]) fr_hash_table_fixup(ht, i); |
655 | | |
656 | 240M | node = ht->buckets[i]; |
657 | 240M | if (node == &ht->null) { |
658 | 172M | if (i == 0) break; |
659 | 170M | i--; |
660 | 170M | continue; /* This bucket was empty too... */ |
661 | 172M | } |
662 | | |
663 | 67.6M | iter->next = node->next; /* Store the next one to examine */ |
664 | 67.6M | iter->bucket = i; |
665 | 67.6M | return node->data; |
666 | 240M | } |
667 | 2.74M | iter->bucket = i; |
668 | | |
669 | 2.74M | return NULL; |
670 | 70.3M | } |
671 | | |
672 | | /** Initialise an iterator |
673 | | * |
674 | | * @note If the hash table is modified the iterator should be considered invalidated. |
675 | | * |
676 | | * @param[in] ht to iterate over. |
677 | | * @param[out] iter to initialise. |
678 | | * @return |
679 | | * - The first entry in the hash table. |
680 | | * - NULL if the hash table is empty. |
681 | | */ |
682 | | void *fr_hash_table_iter_init(fr_hash_table_t *ht, fr_hash_iter_t *iter) |
683 | 3.07M | { |
684 | 3.07M | iter->bucket = ht->num_buckets; |
685 | 3.07M | iter->next = &ht->null; |
686 | | |
687 | 3.07M | return fr_hash_table_iter_next(ht, iter); |
688 | 3.07M | } |
689 | | |
690 | | /** Copy all entries out of a hash table into an array |
691 | | * |
692 | | * @param[in] ctx to allocate array in. |
693 | | * @param[in] out array of hash table entries. |
694 | | * @param[in] ht to flatter. |
695 | | * @return |
696 | | * - 0 on success. |
697 | | * - -1 on failure. |
698 | | */ |
699 | | int fr_hash_table_flatten(TALLOC_CTX *ctx, void **out[], fr_hash_table_t *ht) |
700 | 18 | { |
701 | 18 | uint64_t num = fr_hash_table_num_elements(ht), i; |
702 | 18 | fr_hash_iter_t iter; |
703 | 18 | void *item, **list; |
704 | | |
705 | 18 | if (unlikely(!(list = talloc_array(ctx, void *, num)))) return -1; |
706 | | |
707 | 18 | for (item = fr_hash_table_iter_init(ht, &iter), i = 0; |
708 | 22 | item; |
709 | 18 | item = fr_hash_table_iter_next(ht, &iter), i++) list[i] = item; |
710 | | |
711 | 18 | *out = list; |
712 | | |
713 | 18 | return 0; |
714 | 18 | } |
715 | | |
716 | | /** Ensure all buckets are filled |
717 | | * |
718 | | * This must be called if the table will be read by multiple threads without |
719 | | * synchronisation. Synchronisation is still required for updates. |
720 | | * |
721 | | * @param[in] ht to fill. |
722 | | */ |
723 | | void fr_hash_table_fill(fr_hash_table_t *ht) |
724 | 0 | { |
725 | 0 | int i; |
726 | |
|
727 | 0 | for (i = ht->num_buckets - 1; i >= 0; i--) if (!ht->buckets[i]) fr_hash_table_fixup(ht, i); |
728 | 0 | } |
729 | | |
730 | | #ifdef TESTING |
731 | | /* |
732 | | * Show what the hash table is doing. |
733 | | */ |
734 | | int fr_hash_table_info(fr_hash_table_t *ht) |
735 | | { |
736 | | int i, a, collisions, uninitialized; |
737 | | int array[256]; |
738 | | |
739 | | if (!ht) return 0; |
740 | | |
741 | | uninitialized = collisions = 0; |
742 | | memset(array, 0, sizeof(array)); |
743 | | |
744 | | for (i = 0; i < ht->num_buckets; i++) { |
745 | | uint32_t key; |
746 | | int load; |
747 | | fr_hash_entry_t *node, *next; |
748 | | |
749 | | /* |
750 | | * If we haven't inserted or looked up an entry |
751 | | * in a bucket, it's uninitialized. |
752 | | */ |
753 | | if (!ht->buckets[i]) { |
754 | | uninitialized++; |
755 | | continue; |
756 | | } |
757 | | |
758 | | load = 0; |
759 | | key = ~0; |
760 | | for (node = ht->buckets[i]; node != &ht->null; node = next) { |
761 | | if (node->reversed == key) { |
762 | | collisions++; |
763 | | } else { |
764 | | key = node->reversed; |
765 | | } |
766 | | next = node->next; |
767 | | load++; |
768 | | } |
769 | | |
770 | | if (load > 255) load = 255; |
771 | | array[load]++; |
772 | | } |
773 | | |
774 | | printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht, |
775 | | ht->num_buckets, uninitialized); |
776 | | printf("\tnum entries %d\thash collisions %d\n", |
777 | | ht->num_elements, collisions); |
778 | | |
779 | | a = 0; |
780 | | for (i = 1; i < 256; i++) { |
781 | | if (!array[i]) continue; |
782 | | printf("%d\t%d\n", i, array[i]); |
783 | | |
784 | | /* |
785 | | * Since the entries are ordered, the lookup cost |
786 | | * for any one element in a chain is (on average) |
787 | | * the cost of walking half of the chain. |
788 | | */ |
789 | | if (i > 1) { |
790 | | a += array[i] * i; |
791 | | } |
792 | | } |
793 | | a /= 2; |
794 | | a += array[1]; |
795 | | |
796 | | printf("\texpected lookup cost = %d/%d or %f\n\n", |
797 | | ht->num_elements, a, |
798 | | (float) ht->num_elements / (float) a); |
799 | | |
800 | | return 0; |
801 | | } |
802 | | #endif |
803 | | |
804 | | |
805 | 60.4k | #define FNV_MAGIC_INIT (0x811c9dc5) |
806 | 208k | #define FNV_MAGIC_PRIME (0x01000193) |
807 | | |
808 | | /* |
809 | | * A fast hash function. For details, see: |
810 | | * |
811 | | * http://www.isthe.com/chongo/tech/comp/fnv/ |
812 | | * |
813 | | * Which also includes public domain source. We've re-written |
814 | | * it here for our purposes. |
815 | | */ |
816 | | uint32_t fr_hash(void const *data, size_t size) |
817 | 60.4k | { |
818 | 60.4k | uint8_t const *p = data; |
819 | 60.4k | uint8_t const *q = p + size; |
820 | 60.4k | uint32_t hash = FNV_MAGIC_INIT; |
821 | | |
822 | | /* |
823 | | * FNV-1 hash each octet in the buffer |
824 | | */ |
825 | 269k | while (p != q) { |
826 | | /* |
827 | | * XOR the 8-bit quantity into the bottom of |
828 | | * the hash. |
829 | | */ |
830 | 208k | hash ^= (uint32_t) (*p++); |
831 | | |
832 | | /* |
833 | | * Multiple by 32-bit magic FNV prime, mod 2^32 |
834 | | */ |
835 | 208k | hash *= FNV_MAGIC_PRIME; |
836 | | #if 0 |
837 | | /* |
838 | | * Potential optimization. |
839 | | */ |
840 | | hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24); |
841 | | #endif |
842 | 208k | } |
843 | | |
844 | 60.4k | return hash; |
845 | 60.4k | } |
846 | | |
847 | | /* |
848 | | * Continue hashing data. |
849 | | */ |
850 | | uint32_t fr_hash_update(void const *data, size_t size, uint32_t hash) |
851 | 0 | { |
852 | 0 | uint8_t const *p = data; |
853 | 0 | uint8_t const *q; |
854 | |
|
855 | 0 | if (size == 0) return hash; /* Avoid ubsan issues with access NULL pointer */ |
856 | | |
857 | 0 | q = p + size; |
858 | 0 | while (p < q) { |
859 | 0 | hash ^= (uint32_t) (*p++); |
860 | 0 | hash *= FNV_MAGIC_PRIME; |
861 | 0 | } |
862 | |
|
863 | 0 | return hash; |
864 | 0 | } |
865 | | |
866 | | /* |
867 | | * Hash a C string, so we loop over it once. |
868 | | */ |
869 | | uint32_t fr_hash_string(char const *p) |
870 | 0 | { |
871 | 0 | uint32_t hash = FNV_MAGIC_INIT; |
872 | |
|
873 | 0 | while (*p) { |
874 | 0 | hash ^= (uint32_t) (*p++); |
875 | | /* coverity[overflow_const] */ |
876 | 0 | hash *= FNV_MAGIC_PRIME; |
877 | 0 | } |
878 | |
|
879 | 0 | return hash; |
880 | 0 | } |
881 | | |
882 | | /** Hash a C string, converting all chars to lowercase |
883 | | * |
884 | | */ |
885 | | uint32_t fr_hash_case_string(char const *p) |
886 | 0 | { |
887 | 0 | uint32_t hash = FNV_MAGIC_INIT; |
888 | |
|
889 | 0 | while (*p) { |
890 | 0 | hash ^= (uint32_t) (tolower((uint8_t) *p++)); |
891 | | /* coverity[overflow_const] */ |
892 | 0 | hash *= FNV_MAGIC_PRIME; |
893 | 0 | } |
894 | |
|
895 | 0 | return hash; |
896 | 0 | } |
897 | | |
898 | | /* |
899 | | * 64-bit variants of the above functions/ |
900 | | */ |
901 | | #undef FNV_MAGIC_INIT |
902 | | #undef FNV_MAGIC_PRIME |
903 | 0 | #define FNV_MAGIC_INIT ((uint64_t) 0xcbf29ce484222325) |
904 | 0 | #define FNV_MAGIC_PRIME ((uint64_t) 0x01000193) |
905 | | |
906 | | /* |
907 | | * A 64-bit version of the above hash |
908 | | */ |
909 | | uint64_t fr_hash64(void const *data, size_t size) |
910 | 0 | { |
911 | 0 | uint8_t const *p = data; |
912 | 0 | uint8_t const *q = p + size; |
913 | 0 | uint64_t hash = FNV_MAGIC_INIT; |
914 | | |
915 | | /* |
916 | | * FNV-1 hash each octet in the buffer |
917 | | */ |
918 | 0 | while (p != q) { |
919 | | /* |
920 | | * XOR the 8-bit quantity into the bottom of |
921 | | * the hash. |
922 | | */ |
923 | 0 | hash ^= (uint32_t) (*p++); |
924 | | |
925 | | /* |
926 | | * Multiple by 32-bit magic FNV prime, mod 2^32 |
927 | | */ |
928 | 0 | hash *= FNV_MAGIC_PRIME; |
929 | 0 | } |
930 | |
|
931 | 0 | return hash; |
932 | 0 | } |
933 | | |
934 | | /* |
935 | | * Continue hashing data. |
936 | | */ |
937 | | uint64_t fr_hash64_update(void const *data, size_t size, uint64_t hash) |
938 | 0 | { |
939 | 0 | uint8_t const *p = data; |
940 | 0 | uint8_t const *q; |
941 | |
|
942 | 0 | if (size == 0) return hash; /* Avoid ubsan issues with access NULL pointer */ |
943 | | |
944 | 0 | q = p + size; |
945 | 0 | while (p < q) { |
946 | 0 | hash ^= (uint64_t) (*p++); |
947 | 0 | hash *= FNV_MAGIC_PRIME; |
948 | 0 | } |
949 | |
|
950 | 0 | return hash; |
951 | 0 | } |
952 | | |
953 | | /** Check hash table is sane |
954 | | * |
955 | | */ |
956 | | void fr_hash_table_verify(fr_hash_table_t *ht) |
957 | 3.07M | { |
958 | 3.07M | fr_hash_iter_t iter; |
959 | 3.07M | void *ptr; |
960 | | |
961 | 3.07M | (void)talloc_get_type_abort(ht, fr_hash_table_t); |
962 | 3.07M | (void)talloc_get_type_abort(ht->buckets, fr_hash_entry_t *); |
963 | | |
964 | 3.07M | fr_assert(talloc_array_length(ht->buckets) == ht->num_buckets); |
965 | | |
966 | | /* |
967 | | * Check talloc headers on all data |
968 | | */ |
969 | 3.07M | if (ht->type) { |
970 | 3.07M | for (ptr = fr_hash_table_iter_init(ht, &iter); |
971 | 123M | ptr; |
972 | 119M | ptr = fr_hash_table_iter_next(ht, &iter)) { |
973 | | (void)_talloc_get_type_abort(ptr, ht->type, __location__); |
974 | 119M | } |
975 | 3.07M | } |
976 | 3.07M | } |
977 | | |
978 | | #ifdef TESTING |
979 | | /* |
980 | | * cc -g -DTESTING -I ../include hash.c -o hash |
981 | | * |
982 | | * ./hash |
983 | | */ |
984 | | static uint32_t hash_int(void const *data) |
985 | | { |
986 | | return fr_hash((int *) data, sizeof(int)); |
987 | | } |
988 | | |
989 | | #define MAX 1024*1024 |
990 | | int main(int argc, char **argv) |
991 | | { |
992 | | int i, *p, *q, k; |
993 | | fr_hash_table_t *ht; |
994 | | int *array; |
995 | | |
996 | | ht = fr_hash_table_alloc(NULL, hash_int, NULL, NULL); |
997 | | if (!ht) { |
998 | | fprintf(stderr, "Hash create failed\n"); |
999 | | fr_exit(1); |
1000 | | } |
1001 | | |
1002 | | array = talloc_zero_array(NULL, int, MAX); |
1003 | | if (!array) fr_exit(1); |
1004 | | |
1005 | | for (i = 0; i < MAX; i++) { |
1006 | | p = array + i; |
1007 | | *p = i; |
1008 | | |
1009 | | if (!fr_hash_table_insert(ht, p)) { |
1010 | | fprintf(stderr, "Failed insert %08x\n", i); |
1011 | | fr_exit(1); |
1012 | | } |
1013 | | #ifdef TEST_INSERT |
1014 | | q = fr_hash_table_find(ht, p); |
1015 | | if (q != p) { |
1016 | | fprintf(stderr, "Bad data %d\n", i); |
1017 | | fr_exit(1); |
1018 | | } |
1019 | | #endif |
1020 | | } |
1021 | | |
1022 | | fr_hash_table_info(ht); |
1023 | | |
1024 | | /* |
1025 | | * Build this to see how lookups result in shortening |
1026 | | * of the hash chains. |
1027 | | */ |
1028 | | if (1) { |
1029 | | for (i = 0; i < MAX ; i++) { |
1030 | | q = fr_hash_table_find(ht, &i); |
1031 | | if (!q || *q != i) { |
1032 | | fprintf(stderr, "Failed finding %d\n", i); |
1033 | | fr_exit(1); |
1034 | | } |
1035 | | |
1036 | | #if 0 |
1037 | | if (!fr_hash_table_delete(ht, &i)) { |
1038 | | fprintf(stderr, "Failed deleting %d\n", i); |
1039 | | fr_exit(1); |
1040 | | } |
1041 | | q = fr_hash_table_find(ht, &i); |
1042 | | if (q) { |
1043 | | fprintf(stderr, "Failed to delete %08x\n", i); |
1044 | | fr_exit(1); |
1045 | | } |
1046 | | #endif |
1047 | | } |
1048 | | |
1049 | | fr_hash_table_info(ht); |
1050 | | } |
1051 | | |
1052 | | fr_hash_table_free(ht); |
1053 | | talloc_free(array); |
1054 | | |
1055 | | return EXIT_SUCCESS; |
1056 | | } |
1057 | | #endif |