/src/freeradius-server/src/lib/util/hash.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This program is free software; you can redistribute it and/or modify |
3 | | * it under the terms of the GNU General Public License as published by |
4 | | * the Free Software Foundation; either version 2 of the License, or |
5 | | * (at your option) any later version. |
6 | | * |
7 | | * This program is distributed in the hope that it will be useful, |
8 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | | * GNU General Public License for more details. |
11 | | * |
12 | | * You should have received a copy of the GNU General Public License |
13 | | * along with this program; if not, write to the Free Software |
14 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
15 | | */ |
16 | | |
17 | | /** Resizable hash tables |
18 | | * |
19 | | * The weird "reverse" function is based on an idea from |
20 | | * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with |
21 | | * modifications so that they're not lock-free. :( |
22 | | * |
23 | | * However, the split-order idea allows a fast & easy splitting of the |
24 | | * hash bucket chain when the hash table is resized. Without it, we'd |
25 | | * have to check & update the pointers for every node in the buck chain, |
26 | | * rather than being able to move 1/2 of the entries in the chain with |
27 | | * one update. |
28 | | * |
29 | | * @file src/lib/util/hash.c |
30 | | * |
31 | | * @copyright 2005,2006 The FreeRADIUS server project |
32 | | */ |
33 | | RCSID("$Id: 6ceeaa4f74ea379d554b8766ffd61b568af676d3 $") |
34 | | |
35 | | #include <freeradius-devel/util/hash.h> |
36 | | |
37 | | /* |
38 | | * A reasonable number of buckets to start off with. |
39 | | * Should be a power of two. |
40 | | */ |
41 | 270k | #define FR_HASH_NUM_BUCKETS (64) |
42 | | |
43 | | struct fr_hash_entry_s { |
44 | | fr_hash_entry_t *next; |
45 | | uint32_t reversed; |
46 | | uint32_t key; |
47 | | void *data; |
48 | | }; |
49 | | |
50 | | struct fr_hash_table_s { |
51 | | uint32_t num_elements; //!< Number of elements in the hash table. |
52 | | uint32_t num_buckets; //!< Number of buckets (how long the array is) - power of 2 */ |
53 | | uint32_t next_grow; |
54 | | uint32_t mask; |
55 | | |
56 | | fr_free_t free; //!< Data free function. |
57 | | fr_hash_t hash; //!< Hashing function. |
58 | | fr_cmp_t cmp; //!< Comparison function. |
59 | | |
60 | | char const *type; //!< Talloc type to check elements against. |
61 | | |
62 | | fr_hash_entry_t null; |
63 | | fr_hash_entry_t **buckets; //!< Array of hash buckets. |
64 | | }; |
65 | | |
66 | | #ifdef TESTING |
67 | | static int grow = 0; |
68 | | #endif |
69 | | |
70 | | /* |
71 | | * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}' |
72 | | */ |
73 | | static const uint8_t reversed_byte[256] = { |
74 | | 0, 128, 64, 192, 32, 160, 96, 224, |
75 | | 16, 144, 80, 208, 48, 176, 112, 240, |
76 | | 8, 136, 72, 200, 40, 168, 104, 232, |
77 | | 24, 152, 88, 216, 56, 184, 120, 248, |
78 | | 4, 132, 68, 196, 36, 164, 100, 228, |
79 | | 20, 148, 84, 212, 52, 180, 116, 244, |
80 | | 12, 140, 76, 204, 44, 172, 108, 236, |
81 | | 28, 156, 92, 220, 60, 188, 124, 252, |
82 | | 2, 130, 66, 194, 34, 162, 98, 226, |
83 | | 18, 146, 82, 210, 50, 178, 114, 242, |
84 | | 10, 138, 74, 202, 42, 170, 106, 234, |
85 | | 26, 154, 90, 218, 58, 186, 122, 250, |
86 | | 6, 134, 70, 198, 38, 166, 102, 230, |
87 | | 22, 150, 86, 214, 54, 182, 118, 246, |
88 | | 14, 142, 78, 206, 46, 174, 110, 238, |
89 | | 30, 158, 94, 222, 62, 190, 126, 254, |
90 | | 1, 129, 65, 193, 33, 161, 97, 225, |
91 | | 17, 145, 81, 209, 49, 177, 113, 241, |
92 | | 9, 137, 73, 201, 41, 169, 105, 233, |
93 | | 25, 153, 89, 217, 57, 185, 121, 249, |
94 | | 5, 133, 69, 197, 37, 165, 101, 229, |
95 | | 21, 149, 85, 213, 53, 181, 117, 245, |
96 | | 13, 141, 77, 205, 45, 173, 109, 237, |
97 | | 29, 157, 93, 221, 61, 189, 125, 253, |
98 | | 3, 131, 67, 195, 35, 163, 99, 227, |
99 | | 19, 147, 83, 211, 51, 179, 115, 243, |
100 | | 11, 139, 75, 203, 43, 171, 107, 235, |
101 | | 27, 155, 91, 219, 59, 187, 123, 251, |
102 | | 7, 135, 71, 199, 39, 167, 103, 231, |
103 | | 23, 151, 87, 215, 55, 183, 119, 247, |
104 | | 15, 143, 79, 207, 47, 175, 111, 239, |
105 | | 31, 159, 95, 223, 63, 191, 127, 255 |
106 | | }; |
107 | | |
108 | | |
109 | | /* |
110 | | * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}' |
111 | | */ |
112 | | static uint8_t parent_byte[256] = { |
113 | | 0, 0, 0, 1, 0, 1, 2, 3, |
114 | | 0, 1, 2, 3, 4, 5, 6, 7, |
115 | | 0, 1, 2, 3, 4, 5, 6, 7, |
116 | | 8, 9, 10, 11, 12, 13, 14, 15, |
117 | | 0, 1, 2, 3, 4, 5, 6, 7, |
118 | | 8, 9, 10, 11, 12, 13, 14, 15, |
119 | | 16, 17, 18, 19, 20, 21, 22, 23, |
120 | | 24, 25, 26, 27, 28, 29, 30, 31, |
121 | | 0, 1, 2, 3, 4, 5, 6, 7, |
122 | | 8, 9, 10, 11, 12, 13, 14, 15, |
123 | | 16, 17, 18, 19, 20, 21, 22, 23, |
124 | | 24, 25, 26, 27, 28, 29, 30, 31, |
125 | | 32, 33, 34, 35, 36, 37, 38, 39, |
126 | | 40, 41, 42, 43, 44, 45, 46, 47, |
127 | | 48, 49, 50, 51, 52, 53, 54, 55, |
128 | | 56, 57, 58, 59, 60, 61, 62, 63, |
129 | | 0, 1, 2, 3, 4, 5, 6, 7, |
130 | | 8, 9, 10, 11, 12, 13, 14, 15, |
131 | | 16, 17, 18, 19, 20, 21, 22, 23, |
132 | | 24, 25, 26, 27, 28, 29, 30, 31, |
133 | | 32, 33, 34, 35, 36, 37, 38, 39, |
134 | | 40, 41, 42, 43, 44, 45, 46, 47, |
135 | | 48, 49, 50, 51, 52, 53, 54, 55, |
136 | | 56, 57, 58, 59, 60, 61, 62, 63, |
137 | | 64, 65, 66, 67, 68, 69, 70, 71, |
138 | | 72, 73, 74, 75, 76, 77, 78, 79, |
139 | | 80, 81, 82, 83, 84, 85, 86, 87, |
140 | | 88, 89, 90, 91, 92, 93, 94, 95, |
141 | | 96, 97, 98, 99, 100, 101, 102, 103, |
142 | | 104, 105, 106, 107, 108, 109, 110, 111, |
143 | | 112, 113, 114, 115, 116, 117, 118, 119, |
144 | | 120, 121, 122, 123, 124, 125, 126, 127 |
145 | | }; |
146 | | |
147 | | |
148 | | /* |
149 | | * Reverse a key. |
150 | | */ |
151 | | static uint32_t reverse(uint32_t key) |
152 | 536k | { |
153 | | /* |
154 | | * Cast to uint32_t is required because the |
155 | | * default type of of the expression is an |
156 | | * int and ubsan correctly complains that |
157 | | * the result of 0xff << 24 won't fit in a |
158 | | * signed 32bit integer. |
159 | | */ |
160 | 536k | return (((uint32_t)reversed_byte[key & 0xff] << 24) | |
161 | 536k | ((uint32_t)reversed_byte[(key >> 8) & 0xff] << 16) | |
162 | 536k | ((uint32_t)reversed_byte[(key >> 16) & 0xff] << 8) | |
163 | 536k | ((uint32_t)reversed_byte[(key >> 24) & 0xff])); |
164 | 536k | } |
165 | | |
166 | | /* |
167 | | * Take the parent by discarding the highest bit that is set. |
168 | | */ |
169 | | static uint32_t parent_of(uint32_t key) |
170 | 3.71M | { |
171 | 3.71M | if (key > 0x00ffffff) |
172 | 0 | return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24); |
173 | | |
174 | 3.71M | if (key > 0x0000ffff) |
175 | 0 | return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16); |
176 | | |
177 | 3.71M | if (key > 0x000000ff) |
178 | 3.86k | return (key & 0x000000ff) | (parent_byte[key >> 8] << 8); |
179 | | |
180 | 3.71M | return parent_byte[key]; |
181 | 3.71M | } |
182 | | |
183 | | |
184 | | static fr_hash_entry_t *list_find(fr_hash_table_t *ht, |
185 | | fr_hash_entry_t *head, uint32_t reversed, void const *data) |
186 | 414k | { |
187 | 414k | fr_hash_entry_t *cur; |
188 | | |
189 | 586k | for (cur = head; cur != &ht->null; cur = cur->next) { |
190 | 496k | if (cur->reversed == reversed) { |
191 | 297k | if (ht->cmp) { |
192 | 297k | int cmp = ht->cmp(data, cur->data); |
193 | 297k | if (cmp > 0) break; |
194 | 297k | if (cmp < 0) continue; |
195 | 297k | } |
196 | 297k | return cur; |
197 | 297k | } |
198 | 199k | if (cur->reversed > reversed) break; |
199 | 199k | } |
200 | | |
201 | 117k | return NULL; |
202 | 414k | } |
203 | | |
204 | | |
205 | | /* |
206 | | * Inserts a new entry into the list, in order. |
207 | | */ |
208 | | static bool list_insert(fr_hash_table_t *ht, |
209 | | fr_hash_entry_t **head, fr_hash_entry_t *node) |
210 | 121k | { |
211 | 121k | fr_hash_entry_t **last, *cur; |
212 | | |
213 | 121k | last = head; |
214 | | |
215 | 163k | for (cur = *head; cur != &ht->null; cur = cur->next) { |
216 | 74.0k | if (cur->reversed > node->reversed) break; |
217 | 43.2k | last = &(cur->next); |
218 | | |
219 | 43.2k | if (cur->reversed == node->reversed) { |
220 | 996 | if (ht->cmp) { |
221 | 996 | int8_t cmp = ht->cmp(node->data, cur->data); |
222 | 996 | if (cmp > 0) break; |
223 | 996 | if (cmp < 0) continue; |
224 | 996 | } |
225 | 996 | return false; |
226 | 996 | } |
227 | 43.2k | } |
228 | | |
229 | 120k | node->next = *last; |
230 | 120k | *last = node; |
231 | | |
232 | 120k | return true; |
233 | 121k | } |
234 | | |
235 | | |
236 | | /* |
237 | | * Delete an entry from the list. |
238 | | */ |
239 | | static void list_delete(fr_hash_table_t *ht, |
240 | | fr_hash_entry_t **head, fr_hash_entry_t *node) |
241 | 42 | { |
242 | 42 | fr_hash_entry_t **last, *cur; |
243 | | |
244 | 42 | last = head; |
245 | | |
246 | 42 | for (cur = *head; cur != &ht->null; cur = cur->next) { |
247 | 42 | if (cur == node) break; |
248 | 0 | last = &(cur->next); |
249 | 0 | } |
250 | | |
251 | 42 | *last = node->next; |
252 | 42 | } |
253 | | |
254 | | static int _fr_hash_table_free(fr_hash_table_t *ht) |
255 | 67.4k | { |
256 | 67.4k | uint32_t i; |
257 | 67.4k | fr_hash_entry_t *node, *next; |
258 | | |
259 | 67.4k | if (ht->free) { |
260 | 247k | for (i = 0; i < ht->num_buckets; i++) { |
261 | 243k | if (ht->buckets[i]) for (node = ht->buckets[i]; |
262 | 90.6k | node != &ht->null; |
263 | 53.3k | node = next) { |
264 | 37.3k | next = node->next; |
265 | 37.3k | if (!node->data) continue; /* dummy entry */ |
266 | | |
267 | 37.3k | ht->free(node->data); |
268 | 37.3k | } |
269 | 243k | } |
270 | 3.72k | } |
271 | | |
272 | 67.4k | return 0; |
273 | 67.4k | } |
274 | | |
275 | | /* |
276 | | * Create the table. |
277 | | * |
278 | | * Memory usage in bytes is (20/3) * number of entries. |
279 | | */ |
280 | | fr_hash_table_t *_fr_hash_table_alloc(TALLOC_CTX *ctx, |
281 | | char const *type, |
282 | | fr_hash_t hash_func, |
283 | | fr_cmp_t cmp_func, |
284 | | fr_free_t free_func) |
285 | 67.5k | { |
286 | 67.5k | fr_hash_table_t *ht; |
287 | | |
288 | 67.5k | ht = talloc(ctx, fr_hash_table_t); |
289 | 67.5k | if (!ht) return NULL; |
290 | 67.5k | talloc_set_destructor(ht, _fr_hash_table_free); |
291 | | |
292 | 67.5k | *ht = (fr_hash_table_t){ |
293 | 67.5k | .type = type, |
294 | 67.5k | .free = free_func, |
295 | 67.5k | .hash = hash_func, |
296 | 67.5k | .cmp = cmp_func, |
297 | 67.5k | .num_buckets = FR_HASH_NUM_BUCKETS, |
298 | 67.5k | .mask = FR_HASH_NUM_BUCKETS - 1, |
299 | | |
300 | | /* |
301 | | * Have a default load factor of 2.5. In practice this |
302 | | * means that the average load will hit 3 before the |
303 | | * table grows. |
304 | | */ |
305 | 67.5k | .next_grow = (FR_HASH_NUM_BUCKETS << 1) + (FR_HASH_NUM_BUCKETS >> 1), |
306 | 67.5k | .buckets = talloc_zero_array(ht, fr_hash_entry_t *, FR_HASH_NUM_BUCKETS) |
307 | 67.5k | }; |
308 | 67.5k | if (unlikely(!ht->buckets)) { |
309 | 0 | talloc_free(ht); |
310 | 0 | return NULL; |
311 | 0 | } |
312 | | |
313 | 67.5k | ht->null.reversed = ~0; |
314 | 67.5k | ht->null.key = ~0; |
315 | 67.5k | ht->null.next = &ht->null; |
316 | 67.5k | ht->buckets[0] = &ht->null; |
317 | | |
318 | 67.5k | return ht; |
319 | 67.5k | } |
320 | | |
321 | | |
322 | | /* |
323 | | * If the current bucket is uninitialized, initialize it |
324 | | * by recursively copying information from the parent. |
325 | | * |
326 | | * We may have a situation where entry E is a parent to 2 other |
327 | | * entries E' and E". If we split E into E and E', then the |
328 | | * nodes meant for E" end up in E or E', either of which is |
329 | | * wrong. To solve that problem, we walk down the whole chain, |
330 | | * inserting the elements into the correct place. |
331 | | */ |
332 | | static void fr_hash_table_fixup(fr_hash_table_t *ht, uint32_t entry) |
333 | 3.71M | { |
334 | 3.71M | uint32_t parent_entry; |
335 | 3.71M | fr_hash_entry_t **last, *cur; |
336 | 3.71M | uint32_t this; |
337 | | |
338 | 3.71M | parent_entry = parent_of(entry); |
339 | | |
340 | | /* parent_entry == entry if and only if entry == 0 */ |
341 | | |
342 | 3.71M | if (!ht->buckets[parent_entry]) { |
343 | 1.82M | fr_hash_table_fixup(ht, parent_entry); |
344 | 1.82M | } |
345 | | |
346 | | /* |
347 | | * Keep walking down cur, trying to find entries that |
348 | | * don't belong here any more. There may be multiple |
349 | | * ones, so we can't have a naive algorithm... |
350 | | */ |
351 | 3.71M | last = &ht->buckets[parent_entry]; |
352 | 3.71M | this = parent_entry; |
353 | | |
354 | 3.76M | for (cur = *last; cur != &ht->null; cur = cur->next) { |
355 | 48.4k | uint32_t real_entry; |
356 | | |
357 | 48.4k | real_entry = cur->key & ht->mask; |
358 | 48.4k | if (real_entry != this) { /* ht->buckets[real_entry] == NULL */ |
359 | 11.7k | *last = &ht->null; |
360 | 11.7k | ht->buckets[real_entry] = cur; |
361 | 11.7k | this = real_entry; |
362 | 11.7k | } |
363 | | |
364 | 48.4k | last = &(cur->next); |
365 | 48.4k | } |
366 | | |
367 | | /* |
368 | | * We may NOT have initialized this bucket, so do it now. |
369 | | */ |
370 | 3.71M | if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null; |
371 | 3.71M | } |
372 | | |
373 | | /* |
374 | | * This should be a power of two. Changing it to 4 doesn't seem |
375 | | * to make any difference. |
376 | | */ |
377 | 368 | #define GROW_FACTOR (2) |
378 | | |
379 | | /* |
380 | | * Grow the hash table. |
381 | | */ |
382 | | static void fr_hash_table_grow(fr_hash_table_t *ht) |
383 | 184 | { |
384 | 184 | fr_hash_entry_t **buckets; |
385 | 184 | size_t existing = talloc_get_size(ht->buckets); |
386 | | |
387 | 184 | buckets = talloc_realloc(ht, ht->buckets, fr_hash_entry_t *, GROW_FACTOR * ht->num_buckets); |
388 | 184 | if (!buckets) return; |
389 | | |
390 | 184 | memset(((uint8_t *)buckets) + existing, 0, talloc_get_size(buckets) - existing); |
391 | | |
392 | 184 | ht->buckets = buckets; |
393 | 184 | ht->num_buckets *= GROW_FACTOR; |
394 | 184 | ht->next_grow *= GROW_FACTOR; |
395 | 184 | ht->mask = ht->num_buckets - 1; |
396 | | #ifdef TESTING |
397 | | grow = 1; |
398 | | fprintf(stderr, "GROW TO %d\n", ht->num_buckets); |
399 | | #endif |
400 | 184 | } |
401 | | |
402 | | /* |
403 | | * Internal find a node routine. |
404 | | */ |
405 | | static inline CC_HINT(always_inline) fr_hash_entry_t *hash_table_find(fr_hash_table_t *ht, |
406 | | uint32_t key, void const *data) |
407 | 414k | { |
408 | 414k | uint32_t entry; |
409 | 414k | uint32_t reversed; |
410 | | |
411 | 414k | entry = key & ht->mask; |
412 | 414k | reversed = reverse(key); |
413 | | |
414 | 414k | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
415 | | |
416 | 414k | return list_find(ht, ht->buckets[entry], reversed, data); |
417 | 414k | } |
418 | | |
419 | | /** Find data in a hash table |
420 | | * |
421 | | * @param[in] ht to find data in. |
422 | | * @param[in] data to find. Will be passed to the |
423 | | * hashing function. |
424 | | * @return |
425 | | * - The user data we found. |
426 | | * - NULL if we couldn't find any matching data. |
427 | | */ |
428 | | void *fr_hash_table_find(fr_hash_table_t *ht, void const *data) |
429 | 378k | { |
430 | 378k | fr_hash_entry_t *node; |
431 | | |
432 | 378k | node = hash_table_find(ht, ht->hash(data), data); |
433 | 378k | if (!node) return NULL; |
434 | | |
435 | 297k | return UNCONST(void *, node->data); |
436 | 378k | } |
437 | | |
438 | | /** Hash table lookup with pre-computed key |
439 | | * |
440 | | * @param[in] ht to find data in. |
441 | | * @param[in] key the precomputed key. |
442 | | * @param[in] data for list matching. |
443 | | * @return |
444 | | * - The user data we found. |
445 | | * - NULL if we couldn't find any matching data. |
446 | | */ |
447 | | void *fr_hash_table_find_by_key(fr_hash_table_t *ht, uint32_t key, void const *data) |
448 | 0 | { |
449 | 0 | fr_hash_entry_t *node; |
450 | |
|
451 | 0 | node = hash_table_find(ht, key, data); |
452 | 0 | if (!node) return NULL; |
453 | | |
454 | 0 | return UNCONST(void *, node->data); |
455 | 0 | } |
456 | | |
457 | | /** Insert data into a hash table |
458 | | * |
459 | | * @param[in] ht to insert data into. |
460 | | * @param[in] data to insert. Will be passed to the |
461 | | * hashing function. |
462 | | * @return |
463 | | * - true if data was inserted. |
464 | | * - false if data already existed and was not inserted. |
465 | | */ |
466 | | bool fr_hash_table_insert(fr_hash_table_t *ht, void const *data) |
467 | 121k | { |
468 | 121k | uint32_t key; |
469 | 121k | uint32_t entry; |
470 | 121k | uint32_t reversed; |
471 | 121k | fr_hash_entry_t *node; |
472 | | |
473 | 121k | #ifndef TALLOC_GET_TYPE_ABORT_NOOP |
474 | 121k | if (ht->type) (void)_talloc_get_type_abort(data, ht->type, __location__); |
475 | 121k | #endif |
476 | | |
477 | 121k | key = ht->hash(data); |
478 | 121k | entry = key & ht->mask; |
479 | 121k | reversed = reverse(key); |
480 | | |
481 | 121k | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
482 | | |
483 | | /* |
484 | | * If we try to do our own memory allocation here, the |
485 | | * speedup is only ~15% or so, which isn't worth it. |
486 | | */ |
487 | 121k | node = talloc_zero(ht, fr_hash_entry_t); |
488 | 121k | if (unlikely(!node)) return false; |
489 | | |
490 | 121k | node->next = &ht->null; |
491 | 121k | node->reversed = reversed; |
492 | 121k | node->key = key; |
493 | 121k | node->data = UNCONST(void *, data); |
494 | | |
495 | | /* already in the table, can't insert it */ |
496 | 121k | if (!list_insert(ht, &ht->buckets[entry], node)) { |
497 | 996 | talloc_free(node); |
498 | 996 | return false; |
499 | 996 | } |
500 | | |
501 | | /* |
502 | | * Check the load factor, and grow the table if |
503 | | * necessary. |
504 | | */ |
505 | 120k | ht->num_elements++; |
506 | 120k | if (ht->num_elements >= ht->next_grow) fr_hash_table_grow(ht); |
507 | | |
508 | 120k | return true; |
509 | 121k | } |
510 | | |
511 | | /** Replace old data with new data, OR insert if there is no old |
512 | | * |
513 | | * @param[out] old data that was replaced. If this argument |
514 | | * is not NULL, then the old data will not |
515 | | * be freed, even if a free function is |
516 | | * configured. |
517 | | * @param[in] ht to insert data into. |
518 | | * @param[in] data to replace. Will be passed to the |
519 | | * hashing function. |
520 | | * @return |
521 | | * - 1 if data was replaced. |
522 | | * - 0 if data was inserted. |
523 | | * - -1 if we failed to replace data |
524 | | */ |
525 | | int fr_hash_table_replace(void **old, fr_hash_table_t *ht, void const *data) |
526 | 35.8k | { |
527 | 35.8k | fr_hash_entry_t *node; |
528 | | |
529 | 35.8k | node = hash_table_find(ht, ht->hash(data), data); |
530 | 35.8k | if (!node) { |
531 | 35.4k | if (old) *old = NULL; |
532 | 35.4k | return fr_hash_table_insert(ht, data) ? 1 : -1; |
533 | 35.4k | } |
534 | | |
535 | 321 | if (old) { |
536 | 0 | *old = node->data; |
537 | 321 | } else if (ht->free) { |
538 | 0 | ht->free(node->data); |
539 | 0 | } |
540 | | |
541 | 321 | node->data = UNCONST(void *, data); |
542 | | |
543 | 321 | return 0; |
544 | 35.8k | } |
545 | | |
546 | | /** Remove an entry from the hash table, without freeing the data |
547 | | * |
548 | | * @param[in] ht to remove data from. |
549 | | * @param[in] data to remove. Will be passed to the |
550 | | * hashing function. |
551 | | * @return |
552 | | * - The user data we removed. |
553 | | * - NULL if we couldn't find any matching data. |
554 | | */ |
555 | | void *fr_hash_table_remove(fr_hash_table_t *ht, void const *data) |
556 | 42 | { |
557 | 42 | uint32_t key; |
558 | 42 | uint32_t entry; |
559 | 42 | uint32_t reversed; |
560 | 42 | void *old; |
561 | 42 | fr_hash_entry_t *node; |
562 | | |
563 | 42 | key = ht->hash(data); |
564 | 42 | entry = key & ht->mask; |
565 | 42 | reversed = reverse(key); |
566 | | |
567 | 42 | if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry); |
568 | | |
569 | 42 | node = list_find(ht, ht->buckets[entry], reversed, data); |
570 | 42 | if (!node) return NULL; |
571 | | |
572 | 42 | list_delete(ht, &ht->buckets[entry], node); |
573 | 42 | ht->num_elements--; |
574 | | |
575 | 42 | old = node->data; |
576 | 42 | talloc_free(node); |
577 | | |
578 | 42 | return old; |
579 | 42 | } |
580 | | |
581 | | /** Remove and free data (if a free function was specified) |
582 | | * |
583 | | * @param[in] ht to remove data from. |
584 | | * @param[in] data to remove/free. |
585 | | * @return |
586 | | * - true if we removed data. |
587 | | * - false if we couldn't find any matching data. |
588 | | */ |
589 | | bool fr_hash_table_delete(fr_hash_table_t *ht, void const *data) |
590 | 42 | { |
591 | 42 | void *old; |
592 | | |
593 | 42 | old = fr_hash_table_remove(ht, data); |
594 | 42 | if (!old) return false; |
595 | | |
596 | 42 | if (ht->free) ht->free(old); |
597 | | |
598 | 42 | return true; |
599 | 42 | } |
600 | | |
601 | | /* |
602 | | * Count number of elements |
603 | | */ |
604 | | uint32_t fr_hash_table_num_elements(fr_hash_table_t *ht) |
605 | 21 | { |
606 | 21 | return ht->num_elements; |
607 | 21 | } |
608 | | |
609 | | /** Iterate over entries in a hash table |
610 | | * |
611 | | * @note If the hash table is modified the iterator should be considered invalidated. |
612 | | * |
613 | | * @param[in] ht to iterate over. |
614 | | * @param[in] iter Pointer to an iterator struct, used to maintain |
615 | | * state between calls. |
616 | | * @return |
617 | | * - User data. |
618 | | * - NULL if at the end of the list. |
619 | | */ |
620 | | void *fr_hash_table_iter_next(fr_hash_table_t *ht, fr_hash_iter_t *iter) |
621 | 98.9M | { |
622 | 98.9M | fr_hash_entry_t *node; |
623 | 98.9M | uint32_t i; |
624 | | |
625 | | /* |
626 | | * Return the next element in the bucket |
627 | | */ |
628 | 98.9M | if (iter->node != &ht->null) { |
629 | 40.9M | node = iter->node; |
630 | 40.9M | iter->node = node->next; |
631 | | |
632 | 40.9M | return node->data; |
633 | 40.9M | } |
634 | | |
635 | 58.0M | if (iter->bucket == 0) return NULL; |
636 | | |
637 | | /* |
638 | | * We might have to go through multiple empty |
639 | | * buckets to find one that contains something |
640 | | * we should return |
641 | | */ |
642 | 57.6M | i = iter->bucket - 1; |
643 | 193M | for (;;) { |
644 | 193M | if (!ht->buckets[i]) fr_hash_table_fixup(ht, i); |
645 | | |
646 | 193M | node = ht->buckets[i]; |
647 | 193M | if (node == &ht->null) { |
648 | 138M | if (i == 0) break; |
649 | 135M | i--; |
650 | 135M | continue; /* This bucket was empty too... */ |
651 | 138M | } |
652 | | |
653 | 55.3M | iter->node = node->next; /* Store the next one to examine */ |
654 | 55.3M | iter->bucket = i; |
655 | 55.3M | return node->data; |
656 | 193M | } |
657 | 2.23M | iter->bucket = i; |
658 | | |
659 | 2.23M | return NULL; |
660 | 57.6M | } |
661 | | |
662 | | /** Initialise an iterator |
663 | | * |
664 | | * @note If the hash table is modified the iterator should be considered invalidated. |
665 | | * |
666 | | * @param[in] ht to iterate over. |
667 | | * @param[out] iter to initialise. |
668 | | * @return |
669 | | * - The first entry in the hash table. |
670 | | * - NULL if the hash table is empty. |
671 | | */ |
672 | | void *fr_hash_table_iter_init(fr_hash_table_t *ht, fr_hash_iter_t *iter) |
673 | 2.63M | { |
674 | 2.63M | iter->bucket = ht->num_buckets; |
675 | 2.63M | iter->node = &ht->null; |
676 | | |
677 | 2.63M | return fr_hash_table_iter_next(ht, iter); |
678 | 2.63M | } |
679 | | |
680 | | /** Copy all entries out of a hash table into an array |
681 | | * |
682 | | * @param[in] ctx to allocate array in. |
683 | | * @param[in] out array of hash table entries. |
684 | | * @param[in] ht to flatter. |
685 | | * @return |
686 | | * - 0 on success. |
687 | | * - -1 on failure. |
688 | | */ |
689 | | int fr_hash_table_flatten(TALLOC_CTX *ctx, void **out[], fr_hash_table_t *ht) |
690 | 21 | { |
691 | 21 | uint64_t num = fr_hash_table_num_elements(ht), i; |
692 | 21 | fr_hash_iter_t iter; |
693 | 21 | void *item, **list; |
694 | | |
695 | 21 | if (unlikely(!(list = talloc_array(ctx, void *, num)))) return -1; |
696 | | |
697 | 21 | for (item = fr_hash_table_iter_init(ht, &iter), i = 0; |
698 | 29 | item; |
699 | 21 | item = fr_hash_table_iter_next(ht, &iter), i++) list[i] = item; |
700 | | |
701 | 21 | *out = list; |
702 | | |
703 | 21 | return 0; |
704 | 21 | } |
705 | | |
706 | | /** Ensure all buckets are filled |
707 | | * |
708 | | * This must be called if the table will be read by multiple threads without |
709 | | * synchronisation. Synchronisation is still required for updates. |
710 | | * |
711 | | * @param[in] ht to fill. |
712 | | */ |
713 | | void fr_hash_table_fill(fr_hash_table_t *ht) |
714 | 0 | { |
715 | 0 | int i; |
716 | |
|
717 | 0 | for (i = ht->num_buckets - 1; i >= 0; i--) if (!ht->buckets[i]) fr_hash_table_fixup(ht, i); |
718 | 0 | } |
719 | | |
720 | | #ifdef TESTING |
721 | | /* |
722 | | * Show what the hash table is doing. |
723 | | */ |
724 | | int fr_hash_table_info(fr_hash_table_t *ht) |
725 | | { |
726 | | int i, a, collisions, uninitialized; |
727 | | int array[256]; |
728 | | |
729 | | if (!ht) return 0; |
730 | | |
731 | | uninitialized = collisions = 0; |
732 | | memset(array, 0, sizeof(array)); |
733 | | |
734 | | for (i = 0; i < ht->num_buckets; i++) { |
735 | | uint32_t key; |
736 | | int load; |
737 | | fr_hash_entry_t *node, *next; |
738 | | |
739 | | /* |
740 | | * If we haven't inserted or looked up an entry |
741 | | * in a bucket, it's uninitialized. |
742 | | */ |
743 | | if (!ht->buckets[i]) { |
744 | | uninitialized++; |
745 | | continue; |
746 | | } |
747 | | |
748 | | load = 0; |
749 | | key = ~0; |
750 | | for (node = ht->buckets[i]; node != &ht->null; node = next) { |
751 | | if (node->reversed == key) { |
752 | | collisions++; |
753 | | } else { |
754 | | key = node->reversed; |
755 | | } |
756 | | next = node->next; |
757 | | load++; |
758 | | } |
759 | | |
760 | | if (load > 255) load = 255; |
761 | | array[load]++; |
762 | | } |
763 | | |
764 | | printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht, |
765 | | ht->num_buckets, uninitialized); |
766 | | printf("\tnum entries %d\thash collisions %d\n", |
767 | | ht->num_elements, collisions); |
768 | | |
769 | | a = 0; |
770 | | for (i = 1; i < 256; i++) { |
771 | | if (!array[i]) continue; |
772 | | printf("%d\t%d\n", i, array[i]); |
773 | | |
774 | | /* |
775 | | * Since the entries are ordered, the lookup cost |
776 | | * for any one element in a chain is (on average) |
777 | | * the cost of walking half of the chain. |
778 | | */ |
779 | | if (i > 1) { |
780 | | a += array[i] * i; |
781 | | } |
782 | | } |
783 | | a /= 2; |
784 | | a += array[1]; |
785 | | |
786 | | printf("\texpected lookup cost = %d/%d or %f\n\n", |
787 | | ht->num_elements, a, |
788 | | (float) ht->num_elements / (float) a); |
789 | | |
790 | | return 0; |
791 | | } |
792 | | #endif |
793 | | |
794 | | |
795 | 96.5k | #define FNV_MAGIC_INIT (0x811c9dc5) |
796 | 342k | #define FNV_MAGIC_PRIME (0x01000193) |
797 | | |
798 | | /* |
799 | | * A fast hash function. For details, see: |
800 | | * |
801 | | * http://www.isthe.com/chongo/tech/comp/fnv/ |
802 | | * |
803 | | * Which also includes public domain source. We've re-written |
804 | | * it here for our purposes. |
805 | | */ |
806 | | uint32_t fr_hash(void const *data, size_t size) |
807 | 96.5k | { |
808 | 96.5k | uint8_t const *p = data; |
809 | 96.5k | uint8_t const *q = p + size; |
810 | 96.5k | uint32_t hash = FNV_MAGIC_INIT; |
811 | | |
812 | | /* |
813 | | * FNV-1 hash each octet in the buffer |
814 | | */ |
815 | 439k | while (p != q) { |
816 | | /* |
817 | | * XOR the 8-bit quantity into the bottom of |
818 | | * the hash. |
819 | | */ |
820 | 342k | hash ^= (uint32_t) (*p++); |
821 | | |
822 | | /* |
823 | | * Multiple by 32-bit magic FNV prime, mod 2^32 |
824 | | */ |
825 | 342k | hash *= FNV_MAGIC_PRIME; |
826 | | #if 0 |
827 | | /* |
828 | | * Potential optimization. |
829 | | */ |
830 | | hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24); |
831 | | #endif |
832 | 342k | } |
833 | | |
834 | 96.5k | return hash; |
835 | 96.5k | } |
836 | | |
837 | | /* |
838 | | * Continue hashing data. |
839 | | */ |
840 | | uint32_t fr_hash_update(void const *data, size_t size, uint32_t hash) |
841 | 0 | { |
842 | 0 | uint8_t const *p = data; |
843 | 0 | uint8_t const *q; |
844 | |
|
845 | 0 | if (size == 0) return hash; /* Avoid ubsan issues with access NULL pointer */ |
846 | | |
847 | 0 | q = p + size; |
848 | 0 | while (p < q) { |
849 | 0 | hash *= FNV_MAGIC_PRIME; |
850 | 0 | hash ^= (uint32_t) (*p++); |
851 | 0 | } |
852 | |
|
853 | 0 | return hash; |
854 | 0 | } |
855 | | |
856 | | /* |
857 | | * Hash a C string, so we loop over it once. |
858 | | */ |
859 | | uint32_t fr_hash_string(char const *p) |
860 | 0 | { |
861 | 0 | uint32_t hash = FNV_MAGIC_INIT; |
862 | |
|
863 | 0 | while (*p) { |
864 | 0 | hash *= FNV_MAGIC_PRIME; |
865 | 0 | hash ^= (uint32_t) (*p++); |
866 | 0 | } |
867 | |
|
868 | 0 | return hash; |
869 | 0 | } |
870 | | |
871 | | /** Hash a C string, converting all chars to lowercase |
872 | | * |
873 | | */ |
874 | | uint32_t fr_hash_case_string(char const *p) |
875 | 0 | { |
876 | 0 | uint32_t hash = FNV_MAGIC_INIT; |
877 | |
|
878 | 0 | while (*p) { |
879 | 0 | hash *= FNV_MAGIC_PRIME; |
880 | 0 | hash ^= (uint32_t) (tolower((uint8_t) *p++)); |
881 | 0 | } |
882 | |
|
883 | 0 | return hash; |
884 | 0 | } |
885 | | |
886 | | /** Check hash table is sane |
887 | | * |
888 | | */ |
889 | | void fr_hash_table_verify(fr_hash_table_t *ht) |
890 | 2.63M | { |
891 | 2.63M | fr_hash_iter_t iter; |
892 | 2.63M | void *ptr; |
893 | | |
894 | 2.63M | (void)talloc_get_type_abort(ht, fr_hash_table_t); |
895 | 2.63M | (void)talloc_get_type_abort(ht->buckets, fr_hash_entry_t *); |
896 | | |
897 | 2.63M | fr_assert(talloc_array_length(ht->buckets) == ht->num_buckets); |
898 | | |
899 | | /* |
900 | | * Check talloc headers on all data |
901 | | */ |
902 | 2.63M | if (ht->type) { |
903 | 2.63M | for (ptr = fr_hash_table_iter_init(ht, &iter); |
904 | 98.9M | ptr; |
905 | 96.3M | ptr = fr_hash_table_iter_next(ht, &iter)) { |
906 | 96.3M | (void)_talloc_get_type_abort(ptr, ht->type, __location__); |
907 | 96.3M | } |
908 | 2.63M | } |
909 | 2.63M | } |
910 | | |
911 | | #ifdef TESTING |
912 | | /* |
913 | | * cc -g -DTESTING -I ../include hash.c -o hash |
914 | | * |
915 | | * ./hash |
916 | | */ |
917 | | static uint32_t hash_int(void const *data) |
918 | | { |
919 | | return fr_hash((int *) data, sizeof(int)); |
920 | | } |
921 | | |
922 | | #define MAX 1024*1024 |
923 | | int main(int argc, char **argv) |
924 | | { |
925 | | int i, *p, *q, k; |
926 | | fr_hash_table_t *ht; |
927 | | int *array; |
928 | | |
929 | | ht = fr_hash_table_alloc(NULL, hash_int, NULL, NULL); |
930 | | if (!ht) { |
931 | | fprintf(stderr, "Hash create failed\n"); |
932 | | fr_exit(1); |
933 | | } |
934 | | |
935 | | array = talloc_zero_array(NULL, int, MAX); |
936 | | if (!array) fr_exit(1); |
937 | | |
938 | | for (i = 0; i < MAX; i++) { |
939 | | p = array + i; |
940 | | *p = i; |
941 | | |
942 | | if (!fr_hash_table_insert(ht, p)) { |
943 | | fprintf(stderr, "Failed insert %08x\n", i); |
944 | | fr_exit(1); |
945 | | } |
946 | | #ifdef TEST_INSERT |
947 | | q = fr_hash_table_find(ht, p); |
948 | | if (q != p) { |
949 | | fprintf(stderr, "Bad data %d\n", i); |
950 | | fr_exit(1); |
951 | | } |
952 | | #endif |
953 | | } |
954 | | |
955 | | fr_hash_table_info(ht); |
956 | | |
957 | | /* |
958 | | * Build this to see how lookups result in shortening |
959 | | * of the hash chains. |
960 | | */ |
961 | | if (1) { |
962 | | for (i = 0; i < MAX ; i++) { |
963 | | q = fr_hash_table_find(ht, &i); |
964 | | if (!q || *q != i) { |
965 | | fprintf(stderr, "Failed finding %d\n", i); |
966 | | fr_exit(1); |
967 | | } |
968 | | |
969 | | #if 0 |
970 | | if (!fr_hash_table_delete(ht, &i)) { |
971 | | fprintf(stderr, "Failed deleting %d\n", i); |
972 | | fr_exit(1); |
973 | | } |
974 | | q = fr_hash_table_find(ht, &i); |
975 | | if (q) { |
976 | | fprintf(stderr, "Failed to delete %08x\n", i); |
977 | | fr_exit(1); |
978 | | } |
979 | | #endif |
980 | | } |
981 | | |
982 | | fr_hash_table_info(ht); |
983 | | } |
984 | | |
985 | | fr_hash_table_free(ht); |
986 | | talloc_free(array); |
987 | | |
988 | | return EXIT_SUCCESS; |
989 | | } |
990 | | #endif |