/src/binutils-gdb/libiberty/hashtab.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* An expandable hash tables datatype. |
2 | | Copyright (C) 1999-2025 Free Software Foundation, Inc. |
3 | | Contributed by Vladimir Makarov (vmakarov@cygnus.com). |
4 | | |
5 | | This file is part of the libiberty library. |
6 | | Libiberty is free software; you can redistribute it and/or |
7 | | modify it under the terms of the GNU Library General Public |
8 | | License as published by the Free Software Foundation; either |
9 | | version 2 of the License, or (at your option) any later version. |
10 | | |
11 | | Libiberty is distributed in the hope that it will be useful, |
12 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | Library General Public License for more details. |
15 | | |
16 | | You should have received a copy of the GNU Library General Public |
17 | | License along with libiberty; see the file COPYING.LIB. If |
18 | | not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, |
19 | | Boston, MA 02110-1301, USA. */ |
20 | | |
21 | | /* This package implements basic hash table functionality. It is possible |
22 | | to search for an entry, create an entry and destroy an entry. |
23 | | |
24 | | Elements in the table are generic pointers. |
25 | | |
26 | | The size of the table is not fixed; if the occupancy of the table |
27 | | grows too high the hash table will be expanded. |
28 | | |
29 | | The abstract data implementation is based on generalized Algorithm D |
30 | | from Knuth's book "The art of computer programming". Hash table is |
31 | | expanded by creation of new hash table and transferring elements from |
32 | | the old table to the new table. */ |
33 | | |
34 | | #ifdef HAVE_CONFIG_H |
35 | | #include "config.h" |
36 | | #endif |
37 | | |
38 | | #include <sys/types.h> |
39 | | |
40 | | #ifdef HAVE_STDLIB_H |
41 | | #include <stdlib.h> |
42 | | #endif |
43 | | #ifdef HAVE_STRING_H |
44 | | #include <string.h> |
45 | | #endif |
46 | | #ifdef HAVE_MALLOC_H |
47 | | #include <malloc.h> |
48 | | #endif |
49 | | #ifdef HAVE_LIMITS_H |
50 | | #include <limits.h> |
51 | | #endif |
52 | | #ifdef HAVE_INTTYPES_H |
53 | | #include <inttypes.h> |
54 | | #endif |
55 | | #ifdef HAVE_STDINT_H |
56 | | #include <stdint.h> |
57 | | #endif |
58 | | |
59 | | #include <stdio.h> |
60 | | |
61 | | #include "libiberty.h" |
62 | | #include "ansidecl.h" |
63 | | #include "hashtab.h" |
64 | | |
65 | | #ifndef CHAR_BIT |
66 | | #define CHAR_BIT 8 |
67 | | #endif |
68 | | |
69 | | static unsigned int higher_prime_index (unsigned long); |
70 | | static hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int); |
71 | | static hashval_t htab_mod (hashval_t, htab_t); |
72 | | static hashval_t htab_mod_m2 (hashval_t, htab_t); |
73 | | static hashval_t hash_pointer (const void *); |
74 | | static int eq_pointer (const void *, const void *); |
75 | | static int htab_expand (htab_t); |
76 | | static void **find_empty_slot_for_expand (htab_t, hashval_t); |
77 | | |
78 | | /* At some point, we could make these be NULL, and modify the |
79 | | hash-table routines to handle NULL specially; that would avoid |
80 | | function-call overhead for the common case of hashing pointers. */ |
81 | | htab_hash htab_hash_pointer = hash_pointer; |
82 | | htab_eq htab_eq_pointer = eq_pointer; |
83 | | |
84 | | /* Table of primes and multiplicative inverses. |
85 | | |
86 | | Note that these are not minimally reduced inverses. Unlike when generating |
87 | | code to divide by a constant, we want to be able to use the same algorithm |
88 | | all the time. All of these inverses (are implied to) have bit 32 set. |
89 | | |
90 | | For the record, here's the function that computed the table; it's a |
91 | | vastly simplified version of the function of the same name from gcc. */ |
92 | | |
93 | | #if 0 |
94 | | unsigned int |
95 | | ceil_log2 (unsigned int x) |
96 | | { |
97 | | int i; |
98 | | for (i = 31; i >= 0 ; --i) |
99 | | if (x > (1u << i)) |
100 | | return i+1; |
101 | | abort (); |
102 | | } |
103 | | |
104 | | unsigned int |
105 | | choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp) |
106 | | { |
107 | | unsigned long long mhigh; |
108 | | double nx; |
109 | | int lgup, post_shift; |
110 | | int pow, pow2; |
111 | | int n = 32, precision = 32; |
112 | | |
113 | | lgup = ceil_log2 (d); |
114 | | pow = n + lgup; |
115 | | pow2 = n + lgup - precision; |
116 | | |
117 | | nx = ldexp (1.0, pow) + ldexp (1.0, pow2); |
118 | | mhigh = nx / d; |
119 | | |
120 | | *shiftp = lgup - 1; |
121 | | *mlp = mhigh; |
122 | | return mhigh >> 32; |
123 | | } |
124 | | #endif |
125 | | |
126 | | struct prime_ent |
127 | | { |
128 | | hashval_t prime; |
129 | | hashval_t inv; |
130 | | hashval_t inv_m2; /* inverse of prime-2 */ |
131 | | hashval_t shift; |
132 | | }; |
133 | | |
134 | | static struct prime_ent const prime_tab[] = { |
135 | | { 7, 0x24924925, 0x9999999b, 2 }, |
136 | | { 13, 0x3b13b13c, 0x745d1747, 3 }, |
137 | | { 31, 0x08421085, 0x1a7b9612, 4 }, |
138 | | { 61, 0x0c9714fc, 0x15b1e5f8, 5 }, |
139 | | { 127, 0x02040811, 0x0624dd30, 6 }, |
140 | | { 251, 0x05197f7e, 0x073260a5, 7 }, |
141 | | { 509, 0x01824366, 0x02864fc8, 8 }, |
142 | | { 1021, 0x00c0906d, 0x014191f7, 9 }, |
143 | | { 2039, 0x0121456f, 0x0161e69e, 10 }, |
144 | | { 4093, 0x00300902, 0x00501908, 11 }, |
145 | | { 8191, 0x00080041, 0x00180241, 12 }, |
146 | | { 16381, 0x000c0091, 0x00140191, 13 }, |
147 | | { 32749, 0x002605a5, 0x002a06e6, 14 }, |
148 | | { 65521, 0x000f00e2, 0x00110122, 15 }, |
149 | | { 131071, 0x00008001, 0x00018003, 16 }, |
150 | | { 262139, 0x00014002, 0x0001c004, 17 }, |
151 | | { 524287, 0x00002001, 0x00006001, 18 }, |
152 | | { 1048573, 0x00003001, 0x00005001, 19 }, |
153 | | { 2097143, 0x00004801, 0x00005801, 20 }, |
154 | | { 4194301, 0x00000c01, 0x00001401, 21 }, |
155 | | { 8388593, 0x00001e01, 0x00002201, 22 }, |
156 | | { 16777213, 0x00000301, 0x00000501, 23 }, |
157 | | { 33554393, 0x00001381, 0x00001481, 24 }, |
158 | | { 67108859, 0x00000141, 0x000001c1, 25 }, |
159 | | { 134217689, 0x000004e1, 0x00000521, 26 }, |
160 | | { 268435399, 0x00000391, 0x000003b1, 27 }, |
161 | | { 536870909, 0x00000019, 0x00000029, 28 }, |
162 | | { 1073741789, 0x0000008d, 0x00000095, 29 }, |
163 | | { 2147483647, 0x00000003, 0x00000007, 30 }, |
164 | | /* Avoid "decimal constant so large it is unsigned" for 4294967291. */ |
165 | | { 0xfffffffb, 0x00000006, 0x00000008, 31 } |
166 | | }; |
167 | | |
168 | | /* The following function returns an index into the above table of the |
169 | | nearest prime number which is greater than N, and near a power of two. */ |
170 | | |
171 | | static unsigned int |
172 | | higher_prime_index (unsigned long n) |
173 | 1.34M | { |
174 | 1.34M | unsigned int low = 0; |
175 | 1.34M | unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]); |
176 | | |
177 | 8.07M | while (low != high) |
178 | 6.72M | { |
179 | 6.72M | unsigned int mid = low + (high - low) / 2; |
180 | 6.72M | if (n > prime_tab[mid].prime) |
181 | 1.41M | low = mid + 1; |
182 | 5.30M | else |
183 | 5.30M | high = mid; |
184 | 6.72M | } |
185 | | |
186 | | /* If we've run out of primes, abort. */ |
187 | 1.34M | if (n > prime_tab[low].prime) |
188 | 0 | { |
189 | 0 | fprintf (stderr, "Cannot find prime bigger than %lu\n", n); |
190 | 0 | abort (); |
191 | 0 | } |
192 | | |
193 | 1.34M | return low; |
194 | 1.34M | } |
195 | | |
196 | | /* Returns non-zero if P1 and P2 are equal. */ |
197 | | |
198 | | static int |
199 | | eq_pointer (const void *p1, const void *p2) |
200 | 0 | { |
201 | 0 | return p1 == p2; |
202 | 0 | } |
203 | | |
204 | | |
205 | | /* The parens around the function names in the next two definitions |
206 | | are essential in order to prevent macro expansions of the name. |
207 | | The bodies, however, are expanded as expected, so they are not |
208 | | recursive definitions. */ |
209 | | |
210 | | /* Return the current size of given hash table. */ |
211 | | |
212 | 43.7M | #define htab_size(htab) ((htab)->size) |
213 | | |
214 | | size_t |
215 | | (htab_size) (htab_t htab) |
216 | 0 | { |
217 | 0 | return htab_size (htab); |
218 | 0 | } |
219 | | |
220 | | /* Return the current number of elements in given hash table. */ |
221 | | |
222 | 10.2M | #define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted) |
223 | | |
224 | | size_t |
225 | | (htab_elements) (htab_t htab) |
226 | 10.0M | { |
227 | 10.0M | return htab_elements (htab); |
228 | 10.0M | } |
229 | | |
230 | | /* Return X % Y. */ |
231 | | |
232 | | static inline hashval_t |
233 | | htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift) |
234 | 50.7M | { |
235 | | /* The multiplicative inverses computed above are for 32-bit types, and |
236 | | requires that we be able to compute a highpart multiply. */ |
237 | 50.7M | #ifdef UNSIGNED_64BIT_TYPE |
238 | 50.7M | __extension__ typedef UNSIGNED_64BIT_TYPE ull; |
239 | 50.7M | if (sizeof (hashval_t) * CHAR_BIT <= 32) |
240 | 50.7M | { |
241 | 50.7M | hashval_t t1, t2, t3, t4, q, r; |
242 | | |
243 | 50.7M | t1 = ((ull)x * inv) >> 32; |
244 | 50.7M | t2 = x - t1; |
245 | 50.7M | t3 = t2 >> 1; |
246 | 50.7M | t4 = t1 + t3; |
247 | 50.7M | q = t4 >> shift; |
248 | 50.7M | r = x - (q * y); |
249 | | |
250 | 50.7M | return r; |
251 | 50.7M | } |
252 | 0 | #endif |
253 | | |
254 | | /* Otherwise just use the native division routines. */ |
255 | 0 | return x % y; |
256 | 50.7M | } |
257 | | |
258 | | /* Compute the primary hash for HASH given HTAB's current size. */ |
259 | | |
260 | | static inline hashval_t |
261 | | htab_mod (hashval_t hash, htab_t htab) |
262 | 42.4M | { |
263 | 42.4M | const struct prime_ent *p = &prime_tab[htab->size_prime_index]; |
264 | 42.4M | return htab_mod_1 (hash, p->prime, p->inv, p->shift); |
265 | 42.4M | } |
266 | | |
267 | | /* Compute the secondary hash for HASH given HTAB's current size. */ |
268 | | |
269 | | static inline hashval_t |
270 | | htab_mod_m2 (hashval_t hash, htab_t htab) |
271 | 8.30M | { |
272 | 8.30M | const struct prime_ent *p = &prime_tab[htab->size_prime_index]; |
273 | 8.30M | return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); |
274 | 8.30M | } |
275 | | |
276 | | /* This function creates table with length slightly longer than given |
277 | | source length. Created hash table is initiated as empty (all the |
278 | | hash table entries are HTAB_EMPTY_ENTRY). The function returns the |
279 | | created hash table, or NULL if memory allocation fails. */ |
280 | | |
281 | | htab_t |
282 | | htab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, |
283 | | htab_del del_f, htab_alloc alloc_f, htab_free free_f) |
284 | 1.11M | { |
285 | 1.11M | return htab_create_typed_alloc (size, hash_f, eq_f, del_f, alloc_f, alloc_f, |
286 | 1.11M | free_f); |
287 | 1.11M | } |
288 | | |
289 | | /* As above, but uses the variants of ALLOC_F and FREE_F which accept |
290 | | an extra argument. */ |
291 | | |
292 | | htab_t |
293 | | htab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f, |
294 | | htab_del del_f, void *alloc_arg, |
295 | | htab_alloc_with_arg alloc_f, |
296 | | htab_free_with_arg free_f) |
297 | 0 | { |
298 | 0 | htab_t result; |
299 | 0 | unsigned int size_prime_index; |
300 | |
|
301 | 0 | size_prime_index = higher_prime_index (size); |
302 | 0 | size = prime_tab[size_prime_index].prime; |
303 | |
|
304 | 0 | result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab)); |
305 | 0 | if (result == NULL) |
306 | 0 | return NULL; |
307 | 0 | result->entries = (void **) (*alloc_f) (alloc_arg, size, sizeof (void *)); |
308 | 0 | if (result->entries == NULL) |
309 | 0 | { |
310 | 0 | if (free_f != NULL) |
311 | 0 | (*free_f) (alloc_arg, result); |
312 | 0 | return NULL; |
313 | 0 | } |
314 | 0 | result->size = size; |
315 | 0 | result->size_prime_index = size_prime_index; |
316 | 0 | result->hash_f = hash_f; |
317 | 0 | result->eq_f = eq_f; |
318 | 0 | result->del_f = del_f; |
319 | 0 | result->alloc_arg = alloc_arg; |
320 | 0 | result->alloc_with_arg_f = alloc_f; |
321 | 0 | result->free_with_arg_f = free_f; |
322 | 0 | return result; |
323 | 0 | } |
324 | | |
325 | | /* |
326 | | |
327 | | @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @ |
328 | | htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @ |
329 | | htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @ |
330 | | htab_free @var{free_f}) |
331 | | |
332 | | This function creates a hash table that uses two different allocators |
333 | | @var{alloc_tab_f} and @var{alloc_f} to use for allocating the table itself |
334 | | and its entries respectively. This is useful when variables of different |
335 | | types need to be allocated with different allocators. |
336 | | |
337 | | The created hash table is slightly larger than @var{size} and it is |
338 | | initially empty (all the hash table entries are @code{HTAB_EMPTY_ENTRY}). |
339 | | The function returns the created hash table, or @code{NULL} if memory |
340 | | allocation fails. |
341 | | |
342 | | @end deftypefn |
343 | | |
344 | | */ |
345 | | |
346 | | htab_t |
347 | | htab_create_typed_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, |
348 | | htab_del del_f, htab_alloc alloc_tab_f, |
349 | | htab_alloc alloc_f, htab_free free_f) |
350 | 1.11M | { |
351 | 1.11M | htab_t result; |
352 | 1.11M | unsigned int size_prime_index; |
353 | | |
354 | 1.11M | size_prime_index = higher_prime_index (size); |
355 | 1.11M | size = prime_tab[size_prime_index].prime; |
356 | | |
357 | 1.11M | result = (htab_t) (*alloc_tab_f) (1, sizeof (struct htab)); |
358 | 1.11M | if (result == NULL) |
359 | 0 | return NULL; |
360 | 1.11M | result->entries = (void **) (*alloc_f) (size, sizeof (void *)); |
361 | 1.11M | if (result->entries == NULL) |
362 | 0 | { |
363 | 0 | if (free_f != NULL) |
364 | 0 | (*free_f) (result); |
365 | 0 | return NULL; |
366 | 0 | } |
367 | 1.11M | result->size = size; |
368 | 1.11M | result->size_prime_index = size_prime_index; |
369 | 1.11M | result->hash_f = hash_f; |
370 | 1.11M | result->eq_f = eq_f; |
371 | 1.11M | result->del_f = del_f; |
372 | 1.11M | result->alloc_f = alloc_f; |
373 | 1.11M | result->free_f = free_f; |
374 | 1.11M | return result; |
375 | 1.11M | } |
376 | | |
377 | | |
378 | | /* Update the function pointers and allocation parameter in the htab_t. */ |
379 | | |
380 | | void |
381 | | htab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f, |
382 | | htab_del del_f, void *alloc_arg, |
383 | | htab_alloc_with_arg alloc_f, htab_free_with_arg free_f) |
384 | 0 | { |
385 | 0 | htab->hash_f = hash_f; |
386 | 0 | htab->eq_f = eq_f; |
387 | 0 | htab->del_f = del_f; |
388 | 0 | htab->alloc_arg = alloc_arg; |
389 | 0 | htab->alloc_with_arg_f = alloc_f; |
390 | 0 | htab->free_with_arg_f = free_f; |
391 | 0 | } |
392 | | |
393 | | /* These functions exist solely for backward compatibility. */ |
394 | | |
395 | | #undef htab_create |
396 | | htab_t |
397 | | htab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) |
398 | 1.05M | { |
399 | 1.05M | return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free); |
400 | 1.05M | } |
401 | | |
402 | | htab_t |
403 | | htab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) |
404 | 0 | { |
405 | 0 | return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free); |
406 | 0 | } |
407 | | |
408 | | /* This function frees all memory allocated for given hash table. |
409 | | Naturally the hash table must already exist. */ |
410 | | |
411 | | void |
412 | | htab_delete (htab_t htab) |
413 | 1.11M | { |
414 | 1.11M | size_t size = htab_size (htab); |
415 | 1.11M | void **entries = htab->entries; |
416 | 1.11M | int i; |
417 | | |
418 | 1.11M | if (htab->del_f) |
419 | 17.5M | for (i = size - 1; i >= 0; i--) |
420 | 16.6M | if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) |
421 | 4.19M | (*htab->del_f) (entries[i]); |
422 | | |
423 | 1.11M | if (htab->free_f != NULL) |
424 | 1.11M | { |
425 | 1.11M | (*htab->free_f) (entries); |
426 | 1.11M | (*htab->free_f) (htab); |
427 | 1.11M | } |
428 | 1.48k | else if (htab->free_with_arg_f != NULL) |
429 | 0 | { |
430 | 0 | (*htab->free_with_arg_f) (htab->alloc_arg, entries); |
431 | 0 | (*htab->free_with_arg_f) (htab->alloc_arg, htab); |
432 | 0 | } |
433 | 1.11M | } |
434 | | |
435 | | /* This function clears all entries in the given hash table. */ |
436 | | |
437 | | void |
438 | | htab_empty (htab_t htab) |
439 | 0 | { |
440 | 0 | size_t size = htab_size (htab); |
441 | 0 | void **entries = htab->entries; |
442 | 0 | int i; |
443 | |
|
444 | 0 | if (htab->del_f) |
445 | 0 | for (i = size - 1; i >= 0; i--) |
446 | 0 | if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) |
447 | 0 | (*htab->del_f) (entries[i]); |
448 | | |
449 | | /* Instead of clearing megabyte, downsize the table. */ |
450 | 0 | if (size > 1024*1024 / sizeof (void *)) |
451 | 0 | { |
452 | 0 | int nindex = higher_prime_index (1024 / sizeof (void *)); |
453 | 0 | int nsize = prime_tab[nindex].prime; |
454 | |
|
455 | 0 | if (htab->free_f != NULL) |
456 | 0 | (*htab->free_f) (htab->entries); |
457 | 0 | else if (htab->free_with_arg_f != NULL) |
458 | 0 | (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries); |
459 | 0 | if (htab->alloc_with_arg_f != NULL) |
460 | 0 | htab->entries = (void **) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, |
461 | 0 | sizeof (void *)); |
462 | 0 | else |
463 | 0 | htab->entries = (void **) (*htab->alloc_f) (nsize, sizeof (void *)); |
464 | 0 | htab->size = nsize; |
465 | 0 | htab->size_prime_index = nindex; |
466 | 0 | } |
467 | 0 | else |
468 | 0 | memset (entries, 0, size * sizeof (void *)); |
469 | 0 | htab->n_deleted = 0; |
470 | 0 | htab->n_elements = 0; |
471 | 0 | } |
472 | | |
473 | | /* Similar to htab_find_slot, but without several unwanted side effects: |
474 | | - Does not call htab->eq_f when it finds an existing entry. |
475 | | - Does not change the count of elements/searches/collisions in the |
476 | | hash table. |
477 | | This function also assumes there are no deleted entries in the table. |
478 | | HASH is the hash value for the element to be inserted. */ |
479 | | |
480 | | static void ** |
481 | | find_empty_slot_for_expand (htab_t htab, hashval_t hash) |
482 | 7.36M | { |
483 | 7.36M | hashval_t index = htab_mod (hash, htab); |
484 | 7.36M | size_t size = htab_size (htab); |
485 | 7.36M | void **slot = htab->entries + index; |
486 | 7.36M | hashval_t hash2; |
487 | | |
488 | 7.36M | if (*slot == HTAB_EMPTY_ENTRY) |
489 | 6.83M | return slot; |
490 | 533k | else if (*slot == HTAB_DELETED_ENTRY) |
491 | 0 | abort (); |
492 | | |
493 | 533k | hash2 = htab_mod_m2 (hash, htab); |
494 | 533k | for (;;) |
495 | 723k | { |
496 | 723k | index += hash2; |
497 | 723k | if (index >= size) |
498 | 291k | index -= size; |
499 | | |
500 | 723k | slot = htab->entries + index; |
501 | 723k | if (*slot == HTAB_EMPTY_ENTRY) |
502 | 533k | return slot; |
503 | 190k | else if (*slot == HTAB_DELETED_ENTRY) |
504 | 0 | abort (); |
505 | 723k | } |
506 | 533k | } |
507 | | |
508 | | /* The following function changes size of memory allocated for the |
509 | | entries and repeatedly inserts the table elements. The occupancy |
510 | | of the table after the call will be about 50%. Naturally the hash |
511 | | table must already exist. Remember also that the place of the |
512 | | table entries is changed. If memory allocation failures are allowed, |
513 | | this function will return zero, indicating that the table could not be |
514 | | expanded. If all goes well, it will return a non-zero value. */ |
515 | | |
516 | | static int |
517 | | htab_expand (htab_t htab) |
518 | 227k | { |
519 | 227k | void **oentries; |
520 | 227k | void **olimit; |
521 | 227k | void **p; |
522 | 227k | void **nentries; |
523 | 227k | size_t nsize, osize, elts; |
524 | 227k | unsigned int oindex, nindex; |
525 | | |
526 | 227k | oentries = htab->entries; |
527 | 227k | oindex = htab->size_prime_index; |
528 | 227k | osize = htab->size; |
529 | 227k | olimit = oentries + osize; |
530 | 227k | elts = htab_elements (htab); |
531 | | |
532 | | /* Resize only when table after removal of unused elements is either |
533 | | too full or too empty. */ |
534 | 227k | if (elts * 2 > osize || (elts * 8 < osize && osize > 32)) |
535 | 227k | { |
536 | 227k | nindex = higher_prime_index (elts * 2); |
537 | 227k | nsize = prime_tab[nindex].prime; |
538 | 227k | } |
539 | 134 | else |
540 | 134 | { |
541 | 134 | nindex = oindex; |
542 | 134 | nsize = osize; |
543 | 134 | } |
544 | | |
545 | 227k | if (htab->alloc_with_arg_f != NULL) |
546 | 0 | nentries = (void **) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, |
547 | 0 | sizeof (void *)); |
548 | 227k | else |
549 | 227k | nentries = (void **) (*htab->alloc_f) (nsize, sizeof (void *)); |
550 | 227k | if (nentries == NULL) |
551 | 0 | return 0; |
552 | 227k | htab->entries = nentries; |
553 | 227k | htab->size = nsize; |
554 | 227k | htab->size_prime_index = nindex; |
555 | 227k | htab->n_elements -= htab->n_deleted; |
556 | 227k | htab->n_deleted = 0; |
557 | | |
558 | 227k | p = oentries; |
559 | 227k | do |
560 | 9.70M | { |
561 | 9.70M | void *x = *p; |
562 | | |
563 | 9.70M | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) |
564 | 7.36M | { |
565 | 7.36M | void **q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x)); |
566 | | |
567 | 7.36M | *q = x; |
568 | 7.36M | } |
569 | | |
570 | 9.70M | p++; |
571 | 9.70M | } |
572 | 9.70M | while (p < olimit); |
573 | | |
574 | 227k | if (htab->free_f != NULL) |
575 | 226k | (*htab->free_f) (oentries); |
576 | 420 | else if (htab->free_with_arg_f != NULL) |
577 | 0 | (*htab->free_with_arg_f) (htab->alloc_arg, oentries); |
578 | 227k | return 1; |
579 | 227k | } |
580 | | |
581 | | /* This function searches for a hash table entry equal to the given |
582 | | element. It cannot be used to insert or delete an element. */ |
583 | | |
584 | | void * |
585 | | htab_find_with_hash (htab_t htab, const void *element, hashval_t hash) |
586 | 13.6M | { |
587 | 13.6M | hashval_t index, hash2; |
588 | 13.6M | size_t size; |
589 | 13.6M | void *entry; |
590 | | |
591 | 13.6M | htab->searches++; |
592 | 13.6M | size = htab_size (htab); |
593 | 13.6M | index = htab_mod (hash, htab); |
594 | | |
595 | 13.6M | entry = htab->entries[index]; |
596 | 13.6M | if (entry == HTAB_EMPTY_ENTRY |
597 | 13.6M | || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) |
598 | 8.27M | return entry; |
599 | | |
600 | 5.35M | hash2 = htab_mod_m2 (hash, htab); |
601 | 5.35M | for (;;) |
602 | 50.6M | { |
603 | 50.6M | htab->collisions++; |
604 | 50.6M | index += hash2; |
605 | 50.6M | if (index >= size) |
606 | 2.36M | index -= size; |
607 | | |
608 | 50.6M | entry = htab->entries[index]; |
609 | 50.6M | if (entry == HTAB_EMPTY_ENTRY |
610 | 50.6M | || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) |
611 | 5.35M | return entry; |
612 | 50.6M | } |
613 | 5.35M | } |
614 | | |
615 | | /* Like htab_find_slot_with_hash, but compute the hash value from the |
616 | | element. */ |
617 | | |
618 | | void * |
619 | | htab_find (htab_t htab, const void *element) |
620 | 13.4M | { |
621 | 13.4M | return htab_find_with_hash (htab, element, (*htab->hash_f) (element)); |
622 | 13.4M | } |
623 | | |
624 | | /* This function searches for a hash table slot containing an entry |
625 | | equal to the given element. To delete an entry, call this with |
626 | | insert=NO_INSERT, then call htab_clear_slot on the slot returned |
627 | | (possibly after doing some checks). To insert an entry, call this |
628 | | with insert=INSERT, then write the value you want into the returned |
629 | | slot. When inserting an entry, NULL may be returned if memory |
630 | | allocation fails. */ |
631 | | |
632 | | void ** |
633 | | htab_find_slot_with_hash (htab_t htab, const void *element, |
634 | | hashval_t hash, enum insert_option insert) |
635 | 21.4M | { |
636 | 21.4M | void **first_deleted_slot; |
637 | 21.4M | hashval_t index, hash2; |
638 | 21.4M | size_t size; |
639 | 21.4M | void *entry; |
640 | | |
641 | 21.4M | size = htab_size (htab); |
642 | 21.4M | if (insert == INSERT && size * 3 <= htab->n_elements * 4) |
643 | 227k | { |
644 | 227k | if (htab_expand (htab) == 0) |
645 | 0 | return NULL; |
646 | 227k | size = htab_size (htab); |
647 | 227k | } |
648 | | |
649 | 21.4M | index = htab_mod (hash, htab); |
650 | | |
651 | 21.4M | htab->searches++; |
652 | 21.4M | first_deleted_slot = NULL; |
653 | | |
654 | 21.4M | entry = htab->entries[index]; |
655 | 21.4M | if (entry == HTAB_EMPTY_ENTRY) |
656 | 5.99M | goto empty_entry; |
657 | 15.4M | else if (entry == HTAB_DELETED_ENTRY) |
658 | 127 | first_deleted_slot = &htab->entries[index]; |
659 | 15.4M | else if ((*htab->eq_f) (entry, element)) |
660 | 12.9M | return &htab->entries[index]; |
661 | | |
662 | 2.42M | hash2 = htab_mod_m2 (hash, htab); |
663 | 2.42M | for (;;) |
664 | 4.88M | { |
665 | 4.88M | htab->collisions++; |
666 | 4.88M | index += hash2; |
667 | 4.88M | if (index >= size) |
668 | 2.00M | index -= size; |
669 | | |
670 | 4.88M | entry = htab->entries[index]; |
671 | 4.88M | if (entry == HTAB_EMPTY_ENTRY) |
672 | 1.64M | goto empty_entry; |
673 | 3.24M | else if (entry == HTAB_DELETED_ENTRY) |
674 | 37 | { |
675 | 37 | if (!first_deleted_slot) |
676 | 8 | first_deleted_slot = &htab->entries[index]; |
677 | 37 | } |
678 | 3.24M | else if ((*htab->eq_f) (entry, element)) |
679 | 778k | return &htab->entries[index]; |
680 | 4.88M | } |
681 | | |
682 | 7.63M | empty_entry: |
683 | 7.63M | if (insert == NO_INSERT) |
684 | 0 | return NULL; |
685 | | |
686 | 7.63M | if (first_deleted_slot) |
687 | 70 | { |
688 | 70 | htab->n_deleted--; |
689 | 70 | *first_deleted_slot = HTAB_EMPTY_ENTRY; |
690 | 70 | return first_deleted_slot; |
691 | 70 | } |
692 | | |
693 | 7.63M | htab->n_elements++; |
694 | 7.63M | return &htab->entries[index]; |
695 | 7.63M | } |
696 | | |
697 | | /* Like htab_find_slot_with_hash, but compute the hash value from the |
698 | | element. */ |
699 | | |
700 | | void ** |
701 | | htab_find_slot (htab_t htab, const void *element, enum insert_option insert) |
702 | 21.4M | { |
703 | 21.4M | return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element), |
704 | 21.4M | insert); |
705 | 21.4M | } |
706 | | |
707 | | /* This function deletes an element with the given value from hash |
708 | | table (the hash is computed from the element). If there is no matching |
709 | | element in the hash table, this function does nothing. */ |
710 | | |
711 | | void |
712 | | htab_remove_elt (htab_t htab, const void *element) |
713 | 12 | { |
714 | 12 | htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element)); |
715 | 12 | } |
716 | | |
717 | | |
718 | | /* This function deletes an element with the given value from hash |
719 | | table. If there is no matching element in the hash table, this |
720 | | function does nothing. */ |
721 | | |
722 | | void |
723 | | htab_remove_elt_with_hash (htab_t htab, const void *element, hashval_t hash) |
724 | 12 | { |
725 | 12 | void **slot; |
726 | | |
727 | 12 | slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT); |
728 | 12 | if (slot == NULL) |
729 | 0 | return; |
730 | | |
731 | 12 | if (htab->del_f) |
732 | 12 | (*htab->del_f) (*slot); |
733 | | |
734 | 12 | *slot = HTAB_DELETED_ENTRY; |
735 | 12 | htab->n_deleted++; |
736 | 12 | } |
737 | | |
738 | | /* This function clears a specified slot in a hash table. It is |
739 | | useful when you've already done the lookup and don't want to do it |
740 | | again. */ |
741 | | |
742 | | void |
743 | | htab_clear_slot (htab_t htab, void **slot) |
744 | 14.1k | { |
745 | 14.1k | if (slot < htab->entries || slot >= htab->entries + htab_size (htab) |
746 | 14.1k | || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) |
747 | 0 | abort (); |
748 | | |
749 | 14.1k | if (htab->del_f) |
750 | 0 | (*htab->del_f) (*slot); |
751 | | |
752 | 14.1k | *slot = HTAB_DELETED_ENTRY; |
753 | 14.1k | htab->n_deleted++; |
754 | 14.1k | } |
755 | | |
756 | | /* This function scans over the entire hash table calling |
757 | | CALLBACK for each live entry. If CALLBACK returns false, |
758 | | the iteration stops. INFO is passed as CALLBACK's second |
759 | | argument. */ |
760 | | |
761 | | void |
762 | | htab_traverse_noresize (htab_t htab, htab_trav callback, void *info) |
763 | 7.62k | { |
764 | 7.62k | void **slot; |
765 | 7.62k | void **limit; |
766 | | |
767 | 7.62k | slot = htab->entries; |
768 | 7.62k | limit = slot + htab_size (htab); |
769 | | |
770 | 7.62k | do |
771 | 236k | { |
772 | 236k | void *x = *slot; |
773 | | |
774 | 236k | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) |
775 | 0 | if (!(*callback) (slot, info)) |
776 | 0 | break; |
777 | 236k | } |
778 | 236k | while (++slot < limit); |
779 | 7.62k | } |
780 | | |
781 | | /* Like htab_traverse_noresize, but does resize the table when it is |
782 | | too empty to improve effectivity of subsequent calls. */ |
783 | | |
784 | | void |
785 | | htab_traverse (htab_t htab, htab_trav callback, void *info) |
786 | 0 | { |
787 | 0 | size_t size = htab_size (htab); |
788 | 0 | if (htab_elements (htab) * 8 < size && size > 32) |
789 | 0 | htab_expand (htab); |
790 | |
|
791 | 0 | htab_traverse_noresize (htab, callback, info); |
792 | 0 | } |
793 | | |
794 | | /* Return the fraction of fixed collisions during all work with given |
795 | | hash table. */ |
796 | | |
797 | | double |
798 | | htab_collisions (htab_t htab) |
799 | 0 | { |
800 | 0 | if (htab->searches == 0) |
801 | 0 | return 0.0; |
802 | | |
803 | 0 | return (double) htab->collisions / (double) htab->searches; |
804 | 0 | } |
805 | | |
806 | | /* Hash P as a null-terminated string. |
807 | | |
808 | | Copied from gcc/hashtable.c. Zack had the following to say with respect |
809 | | to applicability, though note that unlike hashtable.c, this hash table |
810 | | implementation re-hashes rather than chain buckets. |
811 | | |
812 | | http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html |
813 | | From: Zack Weinberg <zackw@panix.com> |
814 | | Date: Fri, 17 Aug 2001 02:15:56 -0400 |
815 | | |
816 | | I got it by extracting all the identifiers from all the source code |
817 | | I had lying around in mid-1999, and testing many recurrences of |
818 | | the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either |
819 | | prime numbers or the appropriate identity. This was the best one. |
820 | | I don't remember exactly what constituted "best", except I was |
821 | | looking at bucket-length distributions mostly. |
822 | | |
823 | | So it should be very good at hashing identifiers, but might not be |
824 | | as good at arbitrary strings. |
825 | | |
826 | | I'll add that it thoroughly trounces the hash functions recommended |
827 | | for this use at http://burtleburtle.net/bob/hash/index.html, both |
828 | | on speed and bucket distribution. I haven't tried it against the |
829 | | function they just started using for Perl's hashes. */ |
830 | | |
831 | | hashval_t |
832 | | htab_hash_string (const void *p) |
833 | 1.13M | { |
834 | 1.13M | const unsigned char *str = (const unsigned char *) p; |
835 | 1.13M | hashval_t r = 0; |
836 | 1.13M | unsigned char c; |
837 | | |
838 | 12.2M | while ((c = *str++) != 0) |
839 | 11.1M | r = r * 67 + c - 113; |
840 | | |
841 | 1.13M | return r; |
842 | 1.13M | } |
843 | | |
844 | | /* An equality function for null-terminated strings. */ |
845 | | int |
846 | | htab_eq_string (const void *a, const void *b) |
847 | 0 | { |
848 | 0 | return strcmp ((const char *) a, (const char *) b) == 0; |
849 | 0 | } |
850 | | |
851 | | /* DERIVED FROM: |
852 | | -------------------------------------------------------------------- |
853 | | lookup2.c, by Bob Jenkins, December 1996, Public Domain. |
854 | | hash(), hash2(), hash3, and mix() are externally useful functions. |
855 | | Routines to test the hash are included if SELF_TEST is defined. |
856 | | You can use this free for any purpose. It has no warranty. |
857 | | -------------------------------------------------------------------- |
858 | | */ |
859 | | |
860 | | /* |
861 | | -------------------------------------------------------------------- |
862 | | mix -- mix 3 32-bit values reversibly. |
863 | | For every delta with one or two bit set, and the deltas of all three |
864 | | high bits or all three low bits, whether the original value of a,b,c |
865 | | is almost all zero or is uniformly distributed, |
866 | | * If mix() is run forward or backward, at least 32 bits in a,b,c |
867 | | have at least 1/4 probability of changing. |
868 | | * If mix() is run forward, every bit of c will change between 1/3 and |
869 | | 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) |
870 | | mix() was built out of 36 single-cycle latency instructions in a |
871 | | structure that could supported 2x parallelism, like so: |
872 | | a -= b; |
873 | | a -= c; x = (c>>13); |
874 | | b -= c; a ^= x; |
875 | | b -= a; x = (a<<8); |
876 | | c -= a; b ^= x; |
877 | | c -= b; x = (b>>13); |
878 | | ... |
879 | | Unfortunately, superscalar Pentiums and Sparcs can't take advantage |
880 | | of that parallelism. They've also turned some of those single-cycle |
881 | | latency instructions into multi-cycle latency instructions. Still, |
882 | | this is the fastest good hash I could find. There were about 2^^68 |
883 | | to choose from. I only looked at a billion or so. |
884 | | -------------------------------------------------------------------- |
885 | | */ |
886 | | /* same, but slower, works on systems that might have 8 byte hashval_t's */ |
887 | 17.6k | #define mix(a,b,c) \ |
888 | 17.6k | { \ |
889 | 17.6k | a -= b; a -= c; a ^= (c>>13); \ |
890 | 17.6k | b -= c; b -= a; b ^= (a<< 8); \ |
891 | 17.6k | c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \ |
892 | 17.6k | a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \ |
893 | 17.6k | b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \ |
894 | 17.6k | c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \ |
895 | 17.6k | a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \ |
896 | 17.6k | b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \ |
897 | 17.6k | c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \ |
898 | 17.6k | } |
899 | | |
900 | | /* |
901 | | -------------------------------------------------------------------- |
902 | | hash() -- hash a variable-length key into a 32-bit value |
903 | | k : the key (the unaligned variable-length array of bytes) |
904 | | len : the length of the key, counting by bytes |
905 | | level : can be any 4-byte value |
906 | | Returns a 32-bit value. Every bit of the key affects every bit of |
907 | | the return value. Every 1-bit and 2-bit delta achieves avalanche. |
908 | | About 36+6len instructions. |
909 | | |
910 | | The best hash table sizes are powers of 2. There is no need to do |
911 | | mod a prime (mod is sooo slow!). If you need less than 32 bits, |
912 | | use a bitmask. For example, if you need only 10 bits, do |
913 | | h = (h & hashmask(10)); |
914 | | In which case, the hash table should have hashsize(10) elements. |
915 | | |
916 | | If you are hashing n strings (ub1 **)k, do it like this: |
917 | | for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); |
918 | | |
919 | | By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this |
920 | | code any way you wish, private, educational, or commercial. It's free. |
921 | | |
922 | | See http://burtleburtle.net/bob/hash/evahash.html |
923 | | Use for hash table lookup, or anything where one collision in 2^32 is |
924 | | acceptable. Do NOT use for cryptographic purposes. |
925 | | -------------------------------------------------------------------- |
926 | | */ |
927 | | |
928 | | hashval_t |
929 | | iterative_hash (const void *k_in /* the key */, |
930 | | register size_t length /* the length of the key */, |
931 | | register hashval_t initval /* the previous hash, or |
932 | | an arbitrary value */) |
933 | 0 | { |
934 | 0 | register const unsigned char *k = (const unsigned char *)k_in; |
935 | 0 | register hashval_t a,b,c,len; |
936 | | |
937 | | /* Set up the internal state */ |
938 | 0 | len = length; |
939 | 0 | a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ |
940 | 0 | c = initval; /* the previous hash value */ |
941 | | |
942 | | /*---------------------------------------- handle most of the key */ |
943 | | /* Provide specialization for the aligned case for targets that cannot |
944 | | efficiently perform misaligned loads of a merged access. */ |
945 | 0 | if ((((size_t)k)&3) == 0) |
946 | 0 | while (len >= 12) |
947 | 0 | { |
948 | 0 | a += (k[0] | ((hashval_t)k[1]<<8) | ((hashval_t)k[2]<<16) | ((hashval_t)k[3]<<24)); |
949 | 0 | b += (k[4] | ((hashval_t)k[5]<<8) | ((hashval_t)k[6]<<16) | ((hashval_t)k[7]<<24)); |
950 | 0 | c += (k[8] | ((hashval_t)k[9]<<8) | ((hashval_t)k[10]<<16)| ((hashval_t)k[11]<<24)); |
951 | 0 | mix(a,b,c); |
952 | 0 | k += 12; len -= 12; |
953 | 0 | } |
954 | 0 | else /* unaligned */ |
955 | 0 | while (len >= 12) |
956 | 0 | { |
957 | 0 | a += (k[0] | ((hashval_t)k[1]<<8) | ((hashval_t)k[2]<<16) | ((hashval_t)k[3]<<24)); |
958 | 0 | b += (k[4] | ((hashval_t)k[5]<<8) | ((hashval_t)k[6]<<16) | ((hashval_t)k[7]<<24)); |
959 | 0 | c += (k[8] | ((hashval_t)k[9]<<8) | ((hashval_t)k[10]<<16)| ((hashval_t)k[11]<<24)); |
960 | 0 | mix(a,b,c); |
961 | 0 | k += 12; len -= 12; |
962 | 0 | } |
963 | | |
964 | | /*------------------------------------- handle the last 11 bytes */ |
965 | 0 | c += length; |
966 | 0 | switch(len) /* all the case statements fall through */ |
967 | 0 | { |
968 | 0 | case 11: c+=((hashval_t)k[10]<<24); /* fall through */ |
969 | 0 | case 10: c+=((hashval_t)k[9]<<16); /* fall through */ |
970 | 0 | case 9 : c+=((hashval_t)k[8]<<8); /* fall through */ |
971 | | /* the first byte of c is reserved for the length */ |
972 | 0 | case 8 : b+=((hashval_t)k[7]<<24); /* fall through */ |
973 | 0 | case 7 : b+=((hashval_t)k[6]<<16); /* fall through */ |
974 | 0 | case 6 : b+=((hashval_t)k[5]<<8); /* fall through */ |
975 | 0 | case 5 : b+=k[4]; /* fall through */ |
976 | 0 | case 4 : a+=((hashval_t)k[3]<<24); /* fall through */ |
977 | 0 | case 3 : a+=((hashval_t)k[2]<<16); /* fall through */ |
978 | 0 | case 2 : a+=((hashval_t)k[1]<<8); /* fall through */ |
979 | 0 | case 1 : a+=k[0]; |
980 | | /* case 0: nothing left to add */ |
981 | 0 | } |
982 | 0 | mix(a,b,c); |
983 | | /*-------------------------------------------- report the result */ |
984 | 0 | return c; |
985 | 0 | } |
986 | | |
987 | | /* Returns a hash code for pointer P. Simplified version of evahash */ |
988 | | |
989 | | static hashval_t |
990 | | hash_pointer (const void *p) |
991 | 17.6k | { |
992 | 17.6k | intptr_t v = (intptr_t) p; |
993 | 17.6k | unsigned a, b, c; |
994 | | |
995 | 17.6k | a = b = 0x9e3779b9; |
996 | 17.6k | a += v >> (sizeof (intptr_t) * CHAR_BIT / 2); |
997 | 17.6k | b += v & (((intptr_t) 1 << (sizeof (intptr_t) * CHAR_BIT / 2)) - 1); |
998 | 17.6k | c = 0x42135234; |
999 | 17.6k | mix (a, b, c); |
1000 | 17.6k | return c; |
1001 | 17.6k | } |