Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Hash Table Data Type |
3 | | * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> |
4 | | * |
5 | | * Free Software License: |
6 | | * |
7 | | * All rights are reserved by the author, with the following exceptions: |
8 | | * Permission is granted to freely reproduce and distribute this software, |
9 | | * possibly in exchange for a fee, provided that this copyright notice appears |
10 | | * intact. Permission is also granted to adapt this software to produce |
11 | | * derivative works, as long as the modified versions carry this copyright |
12 | | * notice and additional notices stating that the work has been modified. |
13 | | * This source code may be translated into executable form and incorporated |
14 | | * into proprietary software; there is no requirement for such software to |
15 | | * contain a copyright notice related to this source. |
16 | | * |
17 | | * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $ |
18 | | * $Name: kazlib_1_20 $ |
19 | | * |
20 | | * 2008-04-17 Small modifications to build with stricter warnings |
21 | | * David Lutterkort <dlutter@redhat.com> |
22 | | * |
23 | | */ |
24 | | |
25 | | #include <config.h> |
26 | | |
27 | | #include <stdlib.h> |
28 | | #include <stddef.h> |
29 | | #include <assert.h> |
30 | | #include <string.h> |
31 | | #define HASH_IMPLEMENTATION |
32 | | #include "internal.h" |
33 | | #include "hash.h" |
34 | | |
35 | | #ifdef HASH_DEBUG_VERIFY |
36 | | # define expensive_assert(expr) assert (expr) |
37 | | #else |
38 | | # define expensive_assert(expr) /* empty */ |
39 | | #endif |
40 | | |
41 | | #ifdef KAZLIB_RCSID |
42 | | static const char rcsid[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $"; |
43 | | #endif |
44 | | |
45 | 0 | #define INIT_BITS 4 |
46 | 0 | #define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */ |
47 | 0 | #define INIT_MASK ((INIT_SIZE) - 1) |
48 | | |
49 | 0 | #define next hash_next |
50 | 0 | #define key hash_key |
51 | 0 | #define data hash_data |
52 | 0 | #define hkey hash_hkey |
53 | | |
54 | | #define table hash_table |
55 | 0 | #define nchains hash_nchains |
56 | 0 | #define nodecount hash_nodecount |
57 | 0 | #define maxcount hash_maxcount |
58 | 0 | #define highmark hash_highmark |
59 | 0 | #define lowmark hash_lowmark |
60 | 0 | #define compare hash_compare |
61 | 0 | #define function hash_function |
62 | 0 | #define allocnode hash_allocnode |
63 | 0 | #define freenode hash_freenode |
64 | 0 | #define context hash_context |
65 | 0 | #define mask hash_mask |
66 | 0 | #define dynamic hash_dynamic |
67 | | |
68 | 0 | #define table hash_table |
69 | 0 | #define chain hash_chain |
70 | | |
71 | | static hnode_t *hnode_alloc(void *context); |
72 | | static void hnode_free(hnode_t *node, void *context); |
73 | | static hash_val_t hash_fun_default(const void *key); |
74 | | static int hash_comp_default(const void *key1, const void *key2); |
75 | | |
76 | | int hash_val_t_bit; |
77 | | |
78 | | /* |
79 | | * Compute the number of bits in the hash_val_t type. We know that hash_val_t |
80 | | * is an unsigned integral type. Thus the highest value it can hold is a |
81 | | * Mersenne number (power of two, less one). We initialize a hash_val_t |
82 | | * object with this value and then shift bits out one by one while counting. |
83 | | * Notes: |
84 | | * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power |
85 | | * of two. This means that its binary representation consists of all one |
86 | | * bits, and hence ``val'' is initialized to all one bits. |
87 | | * 2. While bits remain in val, we increment the bit count and shift it to the |
88 | | * right, replacing the topmost bit by zero. |
89 | | */ |
90 | | |
91 | | static void compute_bits(void) |
92 | 0 | { |
93 | 0 | hash_val_t val = HASH_VAL_T_MAX; /* 1 */ |
94 | 0 | int bits = 0; |
95 | |
|
96 | 0 | while (val) { /* 2 */ |
97 | 0 | bits++; |
98 | 0 | val >>= 1; |
99 | 0 | } |
100 | |
|
101 | 0 | hash_val_t_bit = bits; |
102 | 0 | } |
103 | | |
104 | | /* |
105 | | * Verify whether the given argument is a power of two. |
106 | | */ |
107 | | |
108 | | static int is_power_of_two(hash_val_t arg) |
109 | 0 | { |
110 | 0 | if (arg == 0) |
111 | 0 | return 0; |
112 | 0 | while ((arg & 1) == 0) |
113 | 0 | arg >>= 1; |
114 | 0 | return (arg == 1); |
115 | 0 | } |
116 | | |
117 | | /* |
118 | | * Compute a shift amount from a given table size |
119 | | */ |
120 | | |
121 | | static hash_val_t compute_mask(hashcount_t size) |
122 | 0 | { |
123 | 0 | assert (is_power_of_two(size)); |
124 | 0 | assert (size >= 2); |
125 | | |
126 | 0 | return size - 1; |
127 | 0 | } |
128 | | |
129 | | /* |
130 | | * Initialize the table of pointers to null. |
131 | | */ |
132 | | |
133 | | static void clear_table(hash_t *hash) |
134 | 0 | { |
135 | 0 | hash_val_t i; |
136 | |
|
137 | 0 | for (i = 0; i < hash->nchains; i++) |
138 | 0 | hash->table[i] = NULL; |
139 | 0 | } |
140 | | |
141 | | /* |
142 | | * Double the size of a dynamic table. This works as follows. Each chain splits |
143 | | * into two adjacent chains. The shift amount increases by one, exposing an |
144 | | * additional bit of each hashed key. For each node in the original chain, the |
145 | | * value of this newly exposed bit will decide which of the two new chains will |
146 | | * receive the node: if the bit is 1, the chain with the higher index will have |
147 | | * the node, otherwise the lower chain will receive the node. In this manner, |
148 | | * the hash table will continue to function exactly as before without having to |
149 | | * rehash any of the keys. |
150 | | * Notes: |
151 | | * 1. Overflow check. |
152 | | * 2. The new number of chains is twice the old number of chains. |
153 | | * 3. The new mask is one bit wider than the previous, revealing a |
154 | | * new bit in all hashed keys. |
155 | | * 4. Allocate a new table of chain pointers that is twice as large as the |
156 | | * previous one. |
157 | | * 5. If the reallocation was successful, we perform the rest of the growth |
158 | | * algorithm, otherwise we do nothing. |
159 | | * 6. The exposed_bit variable holds a mask with which each hashed key can be |
160 | | * AND-ed to test the value of its newly exposed bit. |
161 | | * 7. Now loop over each chain in the table and sort its nodes into two |
162 | | * chains based on the value of each node's newly exposed hash bit. |
163 | | * 8. The low chain replaces the current chain. The high chain goes |
164 | | * into the corresponding sister chain in the upper half of the table. |
165 | | * 9. We have finished dealing with the chains and nodes. We now update |
166 | | * the various bookeeping fields of the hash structure. |
167 | | */ |
168 | | |
169 | | static void grow_table(hash_t *hash) |
170 | 0 | { |
171 | 0 | hnode_t **newtable; |
172 | |
|
173 | 0 | assert (2 * hash->nchains > hash->nchains); /* 1 */ |
174 | | |
175 | 0 | newtable = realloc(hash->table, |
176 | 0 | sizeof *newtable * hash->nchains * 2); /* 4 */ |
177 | |
|
178 | 0 | if (newtable) { /* 5 */ |
179 | 0 | hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ |
180 | 0 | hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ |
181 | 0 | hash_val_t chain; |
182 | |
|
183 | 0 | assert (mask != hash->mask); |
184 | | |
185 | 0 | for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ |
186 | 0 | hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; |
187 | |
|
188 | 0 | for (hptr = newtable[chain]; hptr != 0; hptr = next) { |
189 | 0 | next = hptr->next; |
190 | |
|
191 | 0 | if (hptr->hkey & exposed_bit) { |
192 | 0 | hptr->next = high_chain; |
193 | 0 | high_chain = hptr; |
194 | 0 | } else { |
195 | 0 | hptr->next = low_chain; |
196 | 0 | low_chain = hptr; |
197 | 0 | } |
198 | 0 | } |
199 | |
|
200 | 0 | newtable[chain] = low_chain; /* 8 */ |
201 | 0 | newtable[chain + hash->nchains] = high_chain; |
202 | 0 | } |
203 | |
|
204 | 0 | hash->table = newtable; /* 9 */ |
205 | 0 | hash->mask = mask; |
206 | 0 | hash->nchains *= 2; |
207 | 0 | hash->lowmark *= 2; |
208 | 0 | hash->highmark *= 2; |
209 | 0 | } |
210 | 0 | expensive_assert (hash_verify(hash)); |
211 | 0 | } |
212 | | |
213 | | /* |
214 | | * Cut a table size in half. This is done by folding together adjacent chains |
215 | | * and populating the lower half of the table with these chains. The chains are |
216 | | * simply spliced together. Once this is done, the whole table is reallocated |
217 | | * to a smaller object. |
218 | | * Notes: |
219 | | * 1. It is illegal to have a hash table with one slot. This would mean that |
220 | | * hash->shift is equal to hash_val_t_bit, an illegal shift value. |
221 | | * Also, other things could go wrong, such as hash->lowmark becoming zero. |
222 | | * 2. Looping over each pair of sister chains, the low_chain is set to |
223 | | * point to the head node of the chain in the lower half of the table, |
224 | | * and high_chain points to the head node of the sister in the upper half. |
225 | | * 3. The intent here is to compute a pointer to the last node of the |
226 | | * lower chain into the low_tail variable. If this chain is empty, |
227 | | * low_tail ends up with a null value. |
228 | | * 4. If the lower chain is not empty, we simply tack the upper chain onto it. |
229 | | * If the upper chain is a null pointer, nothing happens. |
230 | | * 5. Otherwise if the lower chain is empty but the upper one is not, |
231 | | * If the low chain is empty, but the high chain is not, then the |
232 | | * high chain is simply transferred to the lower half of the table. |
233 | | * 6. Otherwise if both chains are empty, there is nothing to do. |
234 | | * 7. All the chain pointers are in the lower half of the table now, so |
235 | | * we reallocate it to a smaller object. This, of course, invalidates |
236 | | * all pointer-to-pointers which reference into the table from the |
237 | | * first node of each chain. |
238 | | * 8. Though it's unlikely, the reallocation may fail. In this case we |
239 | | * pretend that the table _was_ reallocated to a smaller object. |
240 | | * 9. Finally, update the various table parameters to reflect the new size. |
241 | | */ |
242 | | |
243 | | static void shrink_table(hash_t *hash) |
244 | 0 | { |
245 | 0 | hash_val_t chain, nchains; |
246 | 0 | hnode_t **newtable, *low_tail, *low_chain, *high_chain; |
247 | |
|
248 | 0 | assert (hash->nchains >= 2); /* 1 */ |
249 | 0 | nchains = hash->nchains / 2; |
250 | |
|
251 | 0 | for (chain = 0; chain < nchains; chain++) { |
252 | 0 | low_chain = hash->table[chain]; /* 2 */ |
253 | 0 | high_chain = hash->table[chain + nchains]; |
254 | 0 | for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) |
255 | 0 | ; /* 3 */ |
256 | 0 | if (low_chain != 0) /* 4 */ |
257 | 0 | low_tail->next = high_chain; |
258 | 0 | else if (high_chain != 0) /* 5 */ |
259 | 0 | hash->table[chain] = high_chain; |
260 | 0 | else |
261 | 0 | assert (hash->table[chain] == NULL); /* 6 */ |
262 | 0 | } |
263 | 0 | newtable = realloc(hash->table, |
264 | 0 | sizeof *newtable * nchains); /* 7 */ |
265 | 0 | if (newtable) /* 8 */ |
266 | 0 | hash->table = newtable; |
267 | 0 | hash->mask >>= 1; /* 9 */ |
268 | 0 | hash->nchains = nchains; |
269 | 0 | hash->lowmark /= 2; |
270 | 0 | hash->highmark /= 2; |
271 | 0 | expensive_assert (hash_verify(hash)); |
272 | 0 | } |
273 | | |
274 | | |
275 | | /* |
276 | | * Create a dynamic hash table. Both the hash table structure and the table |
277 | | * itself are dynamically allocated. Furthermore, the table is extendible in |
278 | | * that it will automatically grow as its load factor increases beyond a |
279 | | * certain threshold. |
280 | | * Notes: |
281 | | * 1. If the number of bits in the hash_val_t type has not been computed yet, |
282 | | * we do so here, because this is likely to be the first function that the |
283 | | * user calls. |
284 | | * 2. Allocate a hash table control structure. |
285 | | * 3. If a hash table control structure is successfully allocated, we |
286 | | * proceed to initialize it. Otherwise we return a null pointer. |
287 | | * 4. We try to allocate the table of hash chains. |
288 | | * 5. If we were able to allocate the hash chain table, we can finish |
289 | | * initializing the hash structure and the table. Otherwise, we must |
290 | | * backtrack by freeing the hash structure. |
291 | | * 6. INIT_SIZE should be a power of two. The high and low marks are always set |
292 | | * to be twice the table size and half the table size respectively. When the |
293 | | * number of nodes in the table grows beyond the high size (beyond load |
294 | | * factor 2), it will double in size to cut the load factor down to about |
295 | | * about 1. If the table shrinks down to or beneath load factor 0.5, |
296 | | * it will shrink, bringing the load up to about 1. However, the table |
297 | | * will never shrink beneath INIT_SIZE even if it's emptied. |
298 | | * 7. This indicates that the table is dynamically allocated and dynamically |
299 | | * resized on the fly. A table that has this value set to zero is |
300 | | * assumed to be statically allocated and will not be resized. |
301 | | * 8. The table of chains must be properly reset to all null pointers. |
302 | | */ |
303 | | |
304 | | hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, |
305 | | hash_fun_t hashfun) |
306 | 0 | { |
307 | 0 | hash_t *hash; |
308 | |
|
309 | 0 | if (hash_val_t_bit == 0) /* 1 */ |
310 | 0 | compute_bits(); |
311 | |
|
312 | 0 | hash = malloc(sizeof *hash); /* 2 */ |
313 | |
|
314 | 0 | if (hash) { /* 3 */ |
315 | 0 | hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ |
316 | 0 | if (hash->table) { /* 5 */ |
317 | 0 | hash->nchains = INIT_SIZE; /* 6 */ |
318 | 0 | hash->highmark = INIT_SIZE * 2; |
319 | 0 | hash->lowmark = INIT_SIZE / 2; |
320 | 0 | hash->nodecount = 0; |
321 | 0 | hash->maxcount = maxcount; |
322 | 0 | hash->compare = compfun ? compfun : hash_comp_default; |
323 | 0 | hash->function = hashfun ? hashfun : hash_fun_default; |
324 | 0 | hash->allocnode = hnode_alloc; |
325 | 0 | hash->freenode = hnode_free; |
326 | 0 | hash->context = NULL; |
327 | 0 | hash->mask = INIT_MASK; |
328 | 0 | hash->dynamic = 1; /* 7 */ |
329 | 0 | clear_table(hash); /* 8 */ |
330 | 0 | expensive_assert (hash_verify(hash)); |
331 | 0 | return hash; |
332 | 0 | } |
333 | 0 | free(hash); |
334 | 0 | } |
335 | | |
336 | 0 | return NULL; |
337 | 0 | } |
338 | | |
339 | | /* |
340 | | * Select a different set of node allocator routines. |
341 | | */ |
342 | | |
343 | | void hash_set_allocator(hash_t *hash, hnode_alloc_t al, |
344 | | hnode_free_t fr, void *context) |
345 | 0 | { |
346 | 0 | assert (hash_count(hash) == 0); |
347 | | |
348 | 0 | hash->allocnode = al ? al : hnode_alloc; |
349 | 0 | hash->freenode = fr ? fr : hnode_free; |
350 | 0 | hash->context = context; |
351 | 0 | } |
352 | | |
353 | | /* |
354 | | * Free every node in the hash using the hash->freenode() function pointer, and |
355 | | * cause the hash to become empty. |
356 | | */ |
357 | | |
358 | | void hash_free_nodes(hash_t *hash) |
359 | 0 | { |
360 | 0 | hnode_t *node, *next; |
361 | 0 | hash_val_t chain; |
362 | |
|
363 | 0 | for (chain = 0; chain < hash->nchains; chain ++) { |
364 | 0 | node = hash->table[chain]; |
365 | 0 | while (node != NULL) { |
366 | 0 | next = node->next; |
367 | 0 | hash->freenode(node, hash->context); |
368 | 0 | node = next; |
369 | 0 | } |
370 | 0 | hash->table[chain] = NULL; |
371 | 0 | } |
372 | 0 | hash->nodecount = 0; |
373 | 0 | clear_table(hash); |
374 | 0 | } |
375 | | |
376 | | /* |
377 | | * Obsolescent function for removing all nodes from a table, |
378 | | * freeing them and then freeing the table all in one step. |
379 | | */ |
380 | | |
381 | | void hash_free(hash_t *hash) |
382 | 0 | { |
383 | | #ifdef KAZLIB_OBSOLESCENT_DEBUG |
384 | | assert ("call to obsolescent function hash_free()" && 0); |
385 | | #endif |
386 | 0 | hash_free_nodes(hash); |
387 | 0 | hash_destroy(hash); |
388 | 0 | } |
389 | | |
390 | | /* |
391 | | * Free a dynamic hash table structure. |
392 | | */ |
393 | | |
394 | | void hash_destroy(hash_t *hash) |
395 | 0 | { |
396 | 0 | assert (hash_val_t_bit != 0); |
397 | 0 | assert (hash_isempty(hash)); |
398 | 0 | free(hash->table); |
399 | 0 | free(hash); |
400 | 0 | } |
401 | | |
402 | | /* |
403 | | * Initialize a user supplied hash structure. The user also supplies a table of |
404 | | * chains which is assigned to the hash structure. The table is static---it |
405 | | * will not grow or shrink. |
406 | | * 1. See note 1. in hash_create(). |
407 | | * 2. The user supplied array of pointers hopefully contains nchains nodes. |
408 | | * 3. See note 7. in hash_create(). |
409 | | * 4. We must dynamically compute the mask from the given power of two table |
410 | | * size. |
411 | | * 5. The user supplied table can't be assumed to contain null pointers, |
412 | | * so we reset it here. |
413 | | */ |
414 | | |
415 | | hash_t *hash_init(hash_t *hash, hashcount_t maxcount, |
416 | | hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, |
417 | | hashcount_t nchains) |
418 | 0 | { |
419 | 0 | if (hash_val_t_bit == 0) /* 1 */ |
420 | 0 | compute_bits(); |
421 | |
|
422 | 0 | assert (is_power_of_two(nchains)); |
423 | | |
424 | 0 | hash->table = table; /* 2 */ |
425 | 0 | hash->nchains = nchains; |
426 | 0 | hash->nodecount = 0; |
427 | 0 | hash->maxcount = maxcount; |
428 | 0 | hash->compare = compfun ? compfun : hash_comp_default; |
429 | 0 | hash->function = hashfun ? hashfun : hash_fun_default; |
430 | 0 | hash->dynamic = 0; /* 3 */ |
431 | 0 | hash->mask = compute_mask(nchains); /* 4 */ |
432 | 0 | clear_table(hash); /* 5 */ |
433 | |
|
434 | 0 | expensive_assert (hash_verify(hash)); |
435 | 0 | return hash; |
436 | 0 | } |
437 | | |
438 | | /* |
439 | | * Reset the hash scanner so that the next element retrieved by |
440 | | * hash_scan_next() shall be the first element on the first non-empty chain. |
441 | | * Notes: |
442 | | * 1. Locate the first non empty chain. |
443 | | * 2. If an empty chain is found, remember which one it is and set the next |
444 | | * pointer to refer to its first element. |
445 | | * 3. Otherwise if a chain is not found, set the next pointer to NULL |
446 | | * so that hash_scan_next() shall indicate failure. |
447 | | */ |
448 | | |
449 | | void hash_scan_begin(hscan_t *scan, hash_t *hash) |
450 | 0 | { |
451 | 0 | hash_val_t nchains = hash->nchains; |
452 | 0 | hash_val_t chain; |
453 | |
|
454 | 0 | scan->table = hash; |
455 | | |
456 | | /* 1 */ |
457 | |
|
458 | 0 | for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) |
459 | 0 | ; |
460 | |
|
461 | 0 | if (chain < nchains) { /* 2 */ |
462 | 0 | scan->chain = chain; |
463 | 0 | scan->next = hash->table[chain]; |
464 | 0 | } else { /* 3 */ |
465 | 0 | scan->next = NULL; |
466 | 0 | } |
467 | 0 | } |
468 | | |
469 | | /* |
470 | | * Retrieve the next node from the hash table, and update the pointer |
471 | | * for the next invocation of hash_scan_next(). |
472 | | * Notes: |
473 | | * 1. Remember the next pointer in a temporary value so that it can be |
474 | | * returned. |
475 | | * 2. This assertion essentially checks whether the module has been properly |
476 | | * initialized. The first point of interaction with the module should be |
477 | | * either hash_create() or hash_init(), both of which set hash_val_t_bit to |
478 | | * a non zero value. |
479 | | * 3. If the next pointer we are returning is not NULL, then the user is |
480 | | * allowed to call hash_scan_next() again. We prepare the new next pointer |
481 | | * for that call right now. That way the user is allowed to delete the node |
482 | | * we are about to return, since we will no longer be needing it to locate |
483 | | * the next node. |
484 | | * 4. If there is a next node in the chain (next->next), then that becomes the |
485 | | * new next node, otherwise ... |
486 | | * 5. We have exhausted the current chain, and must locate the next subsequent |
487 | | * non-empty chain in the table. |
488 | | * 6. If a non-empty chain is found, the first element of that chain becomes |
489 | | * the new next node. Otherwise there is no new next node and we set the |
490 | | * pointer to NULL so that the next time hash_scan_next() is called, a null |
491 | | * pointer shall be immediately returned. |
492 | | */ |
493 | | |
494 | | |
495 | | hnode_t *hash_scan_next(hscan_t *scan) |
496 | 0 | { |
497 | 0 | hnode_t *next = scan->next; /* 1 */ |
498 | 0 | hash_t *hash = scan->table; |
499 | 0 | hash_val_t chain = scan->chain + 1; |
500 | 0 | hash_val_t nchains = hash->nchains; |
501 | |
|
502 | 0 | assert (hash_val_t_bit != 0); /* 2 */ |
503 | | |
504 | 0 | if (next) { /* 3 */ |
505 | 0 | if (next->next) { /* 4 */ |
506 | 0 | scan->next = next->next; |
507 | 0 | } else { |
508 | 0 | while (chain < nchains && hash->table[chain] == 0) /* 5 */ |
509 | 0 | chain++; |
510 | 0 | if (chain < nchains) { /* 6 */ |
511 | 0 | scan->chain = chain; |
512 | 0 | scan->next = hash->table[chain]; |
513 | 0 | } else { |
514 | 0 | scan->next = NULL; |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } |
518 | 0 | return next; |
519 | 0 | } |
520 | | |
521 | | /* |
522 | | * Insert a node into the hash table. |
523 | | * Notes: |
524 | | * 1. It's illegal to insert more than the maximum number of nodes. The client |
525 | | * should verify that the hash table is not full before attempting an |
526 | | * insertion. |
527 | | * 2. The same key may not be inserted into a table twice. |
528 | | * 3. If the table is dynamic and the load factor is already at >= 2, |
529 | | * grow the table. |
530 | | * 4. We take the bottom N bits of the hash value to derive the chain index, |
531 | | * where N is the base 2 logarithm of the size of the hash table. |
532 | | */ |
533 | | |
534 | | void hash_insert(hash_t *hash, hnode_t *node, const void *key) |
535 | 0 | { |
536 | 0 | hash_val_t hkey, chain; |
537 | |
|
538 | 0 | assert (hash_val_t_bit != 0); |
539 | 0 | assert (node->next == NULL); |
540 | 0 | assert (hash->nodecount < hash->maxcount); /* 1 */ |
541 | 0 | expensive_assert (hash_lookup(hash, key) == NULL); /* 2 */ |
542 | |
|
543 | 0 | if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */ |
544 | 0 | grow_table(hash); |
545 | |
|
546 | 0 | hkey = hash->function(key); |
547 | 0 | chain = hkey & hash->mask; /* 4 */ |
548 | |
|
549 | 0 | node->key = key; |
550 | 0 | node->hkey = hkey; |
551 | 0 | node->next = hash->table[chain]; |
552 | 0 | hash->table[chain] = node; |
553 | 0 | hash->nodecount++; |
554 | |
|
555 | 0 | expensive_assert (hash_verify(hash)); |
556 | 0 | } |
557 | | |
558 | | /* |
559 | | * Find a node in the hash table and return a pointer to it. |
560 | | * Notes: |
561 | | * 1. We hash the key and keep the entire hash value. As an optimization, when |
562 | | * we descend down the chain, we can compare hash values first and only if |
563 | | * hash values match do we perform a full key comparison. |
564 | | * 2. To locate the chain from among 2^N chains, we look at the lower N bits of |
565 | | * the hash value by anding them with the current mask. |
566 | | * 3. Looping through the chain, we compare the stored hash value inside each |
567 | | * node against our computed hash. If they match, then we do a full |
568 | | * comparison between the unhashed keys. If these match, we have located the |
569 | | * entry. |
570 | | */ |
571 | | |
572 | | hnode_t *hash_lookup(hash_t *hash, const void *key) |
573 | 0 | { |
574 | 0 | hash_val_t hkey, chain; |
575 | 0 | hnode_t *nptr; |
576 | |
|
577 | 0 | hkey = hash->function(key); /* 1 */ |
578 | 0 | chain = hkey & hash->mask; /* 2 */ |
579 | |
|
580 | 0 | for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */ |
581 | 0 | if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0) |
582 | 0 | return nptr; |
583 | 0 | } |
584 | | |
585 | 0 | return NULL; |
586 | 0 | } |
587 | | |
588 | | /* |
589 | | * Delete the given node from the hash table. Since the chains |
590 | | * are singly linked, we must locate the start of the node's chain |
591 | | * and traverse. |
592 | | * Notes: |
593 | | * 1. The node must belong to this hash table, and its key must not have |
594 | | * been tampered with. |
595 | | * 2. If this deletion will take the node count below the low mark, we |
596 | | * shrink the table now. |
597 | | * 3. Determine which chain the node belongs to, and fetch the pointer |
598 | | * to the first node in this chain. |
599 | | * 4. If the node being deleted is the first node in the chain, then |
600 | | * simply update the chain head pointer. |
601 | | * 5. Otherwise advance to the node's predecessor, and splice out |
602 | | * by updating the predecessor's next pointer. |
603 | | * 6. Indicate that the node is no longer in a hash table. |
604 | | */ |
605 | | |
606 | | hnode_t *hash_delete(hash_t *hash, hnode_t *node) |
607 | 0 | { |
608 | 0 | hash_val_t chain; |
609 | 0 | hnode_t *hptr; |
610 | |
|
611 | 0 | expensive_assert (hash_lookup(hash, node->key) == node); /* 1 */ |
612 | 0 | assert (hash_val_t_bit != 0); |
613 | | |
614 | 0 | if (hash->dynamic && hash->nodecount <= hash->lowmark |
615 | 0 | && hash->nodecount > INIT_SIZE) |
616 | 0 | shrink_table(hash); /* 2 */ |
617 | |
|
618 | 0 | chain = node->hkey & hash->mask; /* 3 */ |
619 | 0 | hptr = hash->table[chain]; |
620 | |
|
621 | 0 | if (hptr == node) { /* 4 */ |
622 | 0 | hash->table[chain] = node->next; |
623 | 0 | } else { |
624 | 0 | while (hptr->next != node) { /* 5 */ |
625 | 0 | assert (hptr != 0); |
626 | 0 | hptr = hptr->next; |
627 | 0 | } |
628 | 0 | assert (hptr->next == node); |
629 | 0 | hptr->next = node->next; |
630 | 0 | } |
631 | | |
632 | 0 | hash->nodecount--; |
633 | 0 | expensive_assert (hash_verify(hash)); |
634 | |
|
635 | 0 | node->next = NULL; /* 6 */ |
636 | 0 | return node; |
637 | 0 | } |
638 | | |
639 | | int hash_alloc_insert(hash_t *hash, const void *key, void *data) |
640 | 0 | { |
641 | 0 | hnode_t *node = hash->allocnode(hash->context); |
642 | |
|
643 | 0 | if (node) { |
644 | 0 | hnode_init(node, data); |
645 | 0 | hash_insert(hash, node, key); |
646 | 0 | return 0; |
647 | 0 | } |
648 | 0 | return -1; |
649 | 0 | } |
650 | | |
651 | | void hash_delete_free(hash_t *hash, hnode_t *node) |
652 | 0 | { |
653 | 0 | hash_delete(hash, node); |
654 | 0 | hash->freenode(node, hash->context); |
655 | 0 | } |
656 | | |
657 | | /* |
658 | | * Exactly like hash_delete, except does not trigger table shrinkage. This is to be |
659 | | * used from within a hash table scan operation. See notes for hash_delete. |
660 | | */ |
661 | | |
662 | | hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node) |
663 | 0 | { |
664 | 0 | hash_val_t chain; |
665 | 0 | hnode_t *hptr; |
666 | |
|
667 | 0 | expensive_assert (hash_lookup(hash, node->key) == node); |
668 | 0 | assert (hash_val_t_bit != 0); |
669 | | |
670 | 0 | chain = node->hkey & hash->mask; |
671 | 0 | hptr = hash->table[chain]; |
672 | |
|
673 | 0 | if (hptr == node) { |
674 | 0 | hash->table[chain] = node->next; |
675 | 0 | } else { |
676 | 0 | while (hptr->next != node) |
677 | 0 | hptr = hptr->next; |
678 | 0 | hptr->next = node->next; |
679 | 0 | } |
680 | |
|
681 | 0 | hash->nodecount--; |
682 | 0 | expensive_assert (hash_verify(hash)); |
683 | 0 | node->next = NULL; |
684 | |
|
685 | 0 | return node; |
686 | 0 | } |
687 | | |
688 | | /* |
689 | | * Like hash_delete_free but based on hash_scan_delete. |
690 | | */ |
691 | | |
692 | | void hash_scan_delfree(hash_t *hash, hnode_t *node) |
693 | 0 | { |
694 | 0 | hash_scan_delete(hash, node); |
695 | 0 | hash->freenode(node, hash->context); |
696 | 0 | } |
697 | | |
698 | | /* |
699 | | * Verify whether the given object is a valid hash table. This means |
700 | | * Notes: |
701 | | * 1. If the hash table is dynamic, verify whether the high and |
702 | | * low expansion/shrinkage thresholds are powers of two. |
703 | | * 2. Count all nodes in the table, and test each hash value |
704 | | * to see whether it is correct for the node's chain. |
705 | | */ |
706 | | |
707 | | int hash_verify(hash_t *hash) |
708 | 0 | { |
709 | 0 | hashcount_t count = 0; |
710 | 0 | hash_val_t chain; |
711 | 0 | hnode_t *hptr; |
712 | |
|
713 | 0 | if (hash->dynamic) { /* 1 */ |
714 | 0 | if (hash->lowmark >= hash->highmark) |
715 | 0 | return 0; |
716 | 0 | if (!is_power_of_two(hash->highmark)) |
717 | 0 | return 0; |
718 | 0 | if (!is_power_of_two(hash->lowmark)) |
719 | 0 | return 0; |
720 | 0 | } |
721 | | |
722 | 0 | for (chain = 0; chain < hash->nchains; chain++) { /* 2 */ |
723 | 0 | for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) { |
724 | 0 | if ((hptr->hkey & hash->mask) != chain) |
725 | 0 | return 0; |
726 | 0 | count++; |
727 | 0 | } |
728 | 0 | } |
729 | | |
730 | 0 | if (count != hash->nodecount) |
731 | 0 | return 0; |
732 | | |
733 | 0 | return 1; |
734 | 0 | } |
735 | | |
736 | | /* |
737 | | * Test whether the hash table is full and return 1 if this is true, |
738 | | * 0 if it is false. |
739 | | */ |
740 | | |
741 | | #undef hash_isfull |
742 | | int hash_isfull(hash_t *hash) |
743 | 0 | { |
744 | 0 | return hash->nodecount == hash->maxcount; |
745 | 0 | } |
746 | | |
747 | | /* |
748 | | * Test whether the hash table is empty and return 1 if this is true, |
749 | | * 0 if it is false. |
750 | | */ |
751 | | |
752 | | #undef hash_isempty |
753 | | int hash_isempty(hash_t *hash) |
754 | 0 | { |
755 | 0 | return hash->nodecount == 0; |
756 | 0 | } |
757 | | |
758 | | static hnode_t *hnode_alloc(ATTRIBUTE_UNUSED void *context) |
759 | 0 | { |
760 | 0 | return malloc(sizeof *hnode_alloc(NULL)); |
761 | 0 | } |
762 | | |
763 | | static void hnode_free(hnode_t *node, ATTRIBUTE_UNUSED void *context) |
764 | 0 | { |
765 | 0 | free(node); |
766 | 0 | } |
767 | | |
768 | | |
769 | | /* |
770 | | * Create a hash table node dynamically and assign it the given data. |
771 | | */ |
772 | | |
773 | | hnode_t *hnode_create(void *data) |
774 | 0 | { |
775 | 0 | hnode_t *node = malloc(sizeof *node); |
776 | 0 | if (node) { |
777 | 0 | node->data = data; |
778 | 0 | node->next = NULL; |
779 | 0 | } |
780 | 0 | return node; |
781 | 0 | } |
782 | | |
783 | | /* |
784 | | * Initialize a client-supplied node |
785 | | */ |
786 | | |
787 | | hnode_t *hnode_init(hnode_t *hnode, void *data) |
788 | 0 | { |
789 | 0 | hnode->data = data; |
790 | 0 | hnode->next = NULL; |
791 | 0 | return hnode; |
792 | 0 | } |
793 | | |
794 | | /* |
795 | | * Destroy a dynamically allocated node. |
796 | | */ |
797 | | |
798 | | void hnode_destroy(hnode_t *hnode) |
799 | 0 | { |
800 | 0 | free(hnode); |
801 | 0 | } |
802 | | |
803 | | #undef hnode_put |
804 | | void hnode_put(hnode_t *node, void *data) |
805 | 0 | { |
806 | 0 | node->data = data; |
807 | 0 | } |
808 | | |
809 | | #undef hnode_get |
810 | | void *hnode_get(hnode_t *node) |
811 | 0 | { |
812 | 0 | return node->data; |
813 | 0 | } |
814 | | |
815 | | #undef hnode_getkey |
816 | | const void *hnode_getkey(hnode_t *node) |
817 | 0 | { |
818 | 0 | return node->key; |
819 | 0 | } |
820 | | |
821 | | #undef hash_count |
822 | | hashcount_t hash_count(hash_t *hash) |
823 | 0 | { |
824 | 0 | return hash->nodecount; |
825 | 0 | } |
826 | | |
827 | | #undef hash_size |
828 | | hashcount_t hash_size(hash_t *hash) |
829 | 0 | { |
830 | 0 | return hash->nchains; |
831 | 0 | } |
832 | | |
833 | | static hash_val_t hash_fun_default(const void *key) |
834 | 0 | { |
835 | 0 | static unsigned long randbox[] = { |
836 | 0 | 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, |
837 | 0 | 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, |
838 | 0 | 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, |
839 | 0 | 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, |
840 | 0 | }; |
841 | |
|
842 | 0 | const unsigned char *str = key; |
843 | 0 | hash_val_t acc = 0; |
844 | |
|
845 | 0 | while (*str) { |
846 | 0 | acc ^= randbox[(*str + acc) & 0xf]; |
847 | 0 | acc = (acc << 1) | (acc >> 31); |
848 | 0 | acc &= 0xffffffffU; |
849 | 0 | acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; |
850 | 0 | acc = (acc << 2) | (acc >> 30); |
851 | 0 | acc &= 0xffffffffU; |
852 | 0 | } |
853 | 0 | return acc; |
854 | 0 | } |
855 | | |
856 | | static int hash_comp_default(const void *key1, const void *key2) |
857 | 0 | { |
858 | 0 | return strcmp(key1, key2); |
859 | 0 | } |
860 | | |
861 | | #ifdef KAZLIB_TEST_MAIN |
862 | | |
863 | | #include <stdio.h> |
864 | | #include <ctype.h> |
865 | | #include <stdarg.h> |
866 | | |
867 | | typedef char input_t[256]; |
868 | | |
869 | | static int tokenize(char *string, ...) |
870 | | { |
871 | | char **tokptr; |
872 | | va_list arglist; |
873 | | int tokcount = 0; |
874 | | |
875 | | va_start(arglist, string); |
876 | | tokptr = va_arg(arglist, char **); |
877 | | while (tokptr) { |
878 | | while (*string && isspace((unsigned char) *string)) |
879 | | string++; |
880 | | if (!*string) |
881 | | break; |
882 | | *tokptr = string; |
883 | | while (*string && !isspace((unsigned char) *string)) |
884 | | string++; |
885 | | tokptr = va_arg(arglist, char **); |
886 | | tokcount++; |
887 | | if (!*string) |
888 | | break; |
889 | | *string++ = 0; |
890 | | } |
891 | | va_end(arglist); |
892 | | |
893 | | return tokcount; |
894 | | } |
895 | | |
896 | | static char *dupstring(char *str) |
897 | | { |
898 | | int sz = strlen(str) + 1; |
899 | | char *new = malloc(sz); |
900 | | if (new) |
901 | | memcpy(new, str, sz); |
902 | | return new; |
903 | | } |
904 | | |
905 | | static hnode_t *new_node(void *c) |
906 | | { |
907 | | static hnode_t few[5]; |
908 | | static int count; |
909 | | |
910 | | if (count < 5) |
911 | | return few + count++; |
912 | | |
913 | | return NULL; |
914 | | } |
915 | | |
916 | | static void del_node(hnode_t *n, void *c) |
917 | | { |
918 | | } |
919 | | |
920 | | int main(void) |
921 | | { |
922 | | input_t in; |
923 | | hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0); |
924 | | hnode_t *hn; |
925 | | hscan_t hs; |
926 | | char *tok1, *tok2, *val; |
927 | | const char *key; |
928 | | int prompt = 0; |
929 | | |
930 | | char *help = |
931 | | "a <key> <val> add value to hash table\n" |
932 | | "d <key> delete value from hash table\n" |
933 | | "l <key> lookup value in hash table\n" |
934 | | "n show size of hash table\n" |
935 | | "c show number of entries\n" |
936 | | "t dump whole hash table\n" |
937 | | "+ increase hash table (private func)\n" |
938 | | "- decrease hash table (private func)\n" |
939 | | "b print hash_t_bit value\n" |
940 | | "p turn prompt on\n" |
941 | | "s switch to non-functioning allocator\n" |
942 | | "q quit"; |
943 | | |
944 | | if (!h) |
945 | | puts("hash_create failed"); |
946 | | |
947 | | for (;;) { |
948 | | if (prompt) |
949 | | putchar('>'); |
950 | | fflush(stdout); |
951 | | |
952 | | if (!fgets(in, sizeof(input_t), stdin)) |
953 | | break; |
954 | | |
955 | | switch(in[0]) { |
956 | | case '?': |
957 | | puts(help); |
958 | | break; |
959 | | case 'b': |
960 | | printf("%d\n", hash_val_t_bit); |
961 | | break; |
962 | | case 'a': |
963 | | if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { |
964 | | puts("what?"); |
965 | | break; |
966 | | } |
967 | | key = dupstring(tok1); |
968 | | val = dupstring(tok2); |
969 | | |
970 | | if (!key || !val) { |
971 | | puts("out of memory"); |
972 | | free((void *) key); |
973 | | free(val); |
974 | | break; |
975 | | } |
976 | | |
977 | | if (hash_alloc_insert(h, key, val) < 0) { |
978 | | puts("hash_alloc_insert failed"); |
979 | | free((void *) key); |
980 | | free(val); |
981 | | break; |
982 | | } |
983 | | break; |
984 | | case 'd': |
985 | | if (tokenize(in+1, &tok1, (char **) 0) != 1) { |
986 | | puts("what?"); |
987 | | break; |
988 | | } |
989 | | hn = hash_lookup(h, tok1); |
990 | | if (!hn) { |
991 | | puts("hash_lookup failed"); |
992 | | break; |
993 | | } |
994 | | val = hnode_get(hn); |
995 | | key = hnode_getkey(hn); |
996 | | hash_scan_delfree(h, hn); |
997 | | free((void *) key); |
998 | | free(val); |
999 | | break; |
1000 | | case 'l': |
1001 | | if (tokenize(in+1, &tok1, (char **) 0) != 1) { |
1002 | | puts("what?"); |
1003 | | break; |
1004 | | } |
1005 | | hn = hash_lookup(h, tok1); |
1006 | | if (!hn) { |
1007 | | puts("hash_lookup failed"); |
1008 | | break; |
1009 | | } |
1010 | | val = hnode_get(hn); |
1011 | | puts(val); |
1012 | | break; |
1013 | | case 'n': |
1014 | | printf("%lu\n", (unsigned long) hash_size(h)); |
1015 | | break; |
1016 | | case 'c': |
1017 | | printf("%lu\n", (unsigned long) hash_count(h)); |
1018 | | break; |
1019 | | case 't': |
1020 | | hash_scan_begin(&hs, h); |
1021 | | while ((hn = hash_scan_next(&hs))) |
1022 | | printf("%s\t%s\n", (char*) hnode_getkey(hn), |
1023 | | (char*) hnode_get(hn)); |
1024 | | break; |
1025 | | case '+': |
1026 | | grow_table(h); /* private function */ |
1027 | | break; |
1028 | | case '-': |
1029 | | shrink_table(h); /* private function */ |
1030 | | break; |
1031 | | case 'q': |
1032 | | exit(0); |
1033 | | break; |
1034 | | case '\0': |
1035 | | break; |
1036 | | case 'p': |
1037 | | prompt = 1; |
1038 | | break; |
1039 | | case 's': |
1040 | | hash_set_allocator(h, new_node, del_node, NULL); |
1041 | | break; |
1042 | | default: |
1043 | | putchar('?'); |
1044 | | putchar('\n'); |
1045 | | break; |
1046 | | } |
1047 | | } |
1048 | | |
1049 | | return 0; |
1050 | | } |
1051 | | |
1052 | | #endif |