/src/Python-3.8.3/Modules/hashtable.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* The implementation of the hash table (_Py_hashtable_t) is based on the |
2 | | cfuhash project: |
3 | | http://sourceforge.net/projects/libcfu/ |
4 | | |
5 | | Copyright of cfuhash: |
6 | | ---------------------------------- |
7 | | Creation date: 2005-06-24 21:22:40 |
8 | | Authors: Don |
9 | | Change log: |
10 | | |
11 | | Copyright (c) 2005 Don Owens |
12 | | All rights reserved. |
13 | | |
14 | | This code is released under the BSD license: |
15 | | |
16 | | Redistribution and use in source and binary forms, with or without |
17 | | modification, are permitted provided that the following conditions |
18 | | are met: |
19 | | |
20 | | * Redistributions of source code must retain the above copyright |
21 | | notice, this list of conditions and the following disclaimer. |
22 | | |
23 | | * Redistributions in binary form must reproduce the above |
24 | | copyright notice, this list of conditions and the following |
25 | | disclaimer in the documentation and/or other materials provided |
26 | | with the distribution. |
27 | | |
28 | | * Neither the name of the author nor the names of its |
29 | | contributors may be used to endorse or promote products derived |
30 | | from this software without specific prior written permission. |
31 | | |
32 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
33 | | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
34 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
35 | | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 | | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 | | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
38 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
39 | | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
40 | | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
41 | | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
42 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
43 | | OF THE POSSIBILITY OF SUCH DAMAGE. |
44 | | ---------------------------------- |
45 | | */ |
46 | | |
47 | | #include "Python.h" |
48 | | #include "hashtable.h" |
49 | | |
50 | 0 | #define HASHTABLE_MIN_SIZE 16 |
51 | 0 | #define HASHTABLE_HIGH 0.50 |
52 | 0 | #define HASHTABLE_LOW 0.10 |
53 | 0 | #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) |
54 | | |
55 | | #define BUCKETS_HEAD(SLIST) \ |
56 | 0 | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) |
57 | | #define TABLE_HEAD(HT, BUCKET) \ |
58 | 0 | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) |
59 | | #define ENTRY_NEXT(ENTRY) \ |
60 | 0 | ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) |
61 | | #define HASHTABLE_ITEM_SIZE(HT) \ |
62 | 0 | (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) |
63 | | |
64 | | #define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ |
65 | 0 | do { \ |
66 | 0 | assert((DATA_SIZE) == (TABLE)->data_size); \ |
67 | 0 | memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \ |
68 | 0 | (DATA_SIZE)); \ |
69 | 0 | } while (0) |
70 | | |
71 | | #define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \ |
72 | 0 | do { \ |
73 | 0 | assert((DATA_SIZE) == (TABLE)->data_size); \ |
74 | 0 | memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \ |
75 | 0 | (PDATA), (DATA_SIZE)); \ |
76 | 0 | } while (0) |
77 | | |
78 | | /* Forward declaration */ |
79 | | static void hashtable_rehash(_Py_hashtable_t *ht); |
80 | | |
81 | | static void |
82 | | _Py_slist_init(_Py_slist_t *list) |
83 | 0 | { |
84 | 0 | list->head = NULL; |
85 | 0 | } |
86 | | |
87 | | |
88 | | static void |
89 | | _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) |
90 | 0 | { |
91 | 0 | item->next = list->head; |
92 | 0 | list->head = item; |
93 | 0 | } |
94 | | |
95 | | |
96 | | static void |
97 | | _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, |
98 | | _Py_slist_item_t *item) |
99 | 0 | { |
100 | 0 | if (previous != NULL) |
101 | 0 | previous->next = item->next; |
102 | 0 | else |
103 | 0 | list->head = item->next; |
104 | 0 | } |
105 | | |
106 | | |
107 | | Py_uhash_t |
108 | | _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey) |
109 | 0 | { |
110 | 0 | void *key; |
111 | |
|
112 | 0 | _Py_HASHTABLE_READ_KEY(ht, pkey, key); |
113 | 0 | return (Py_uhash_t)_Py_HashPointer(key); |
114 | 0 | } |
115 | | |
116 | | |
117 | | int |
118 | | _Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey, |
119 | | const _Py_hashtable_entry_t *entry) |
120 | 0 | { |
121 | 0 | const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry); |
122 | 0 | return (memcmp(pkey, pkey2, ht->key_size) == 0); |
123 | 0 | } |
124 | | |
125 | | |
126 | | /* makes sure the real size of the buckets array is a power of 2 */ |
127 | | static size_t |
128 | | round_size(size_t s) |
129 | 0 | { |
130 | 0 | size_t i; |
131 | 0 | if (s < HASHTABLE_MIN_SIZE) |
132 | 0 | return HASHTABLE_MIN_SIZE; |
133 | 0 | i = 1; |
134 | 0 | while (i < s) |
135 | 0 | i <<= 1; |
136 | 0 | return i; |
137 | 0 | } |
138 | | |
139 | | |
140 | | _Py_hashtable_t * |
141 | | _Py_hashtable_new_full(size_t key_size, size_t data_size, |
142 | | size_t init_size, |
143 | | _Py_hashtable_hash_func hash_func, |
144 | | _Py_hashtable_compare_func compare_func, |
145 | | _Py_hashtable_allocator_t *allocator) |
146 | 0 | { |
147 | 0 | _Py_hashtable_t *ht; |
148 | 0 | size_t buckets_size; |
149 | 0 | _Py_hashtable_allocator_t alloc; |
150 | |
|
151 | 0 | if (allocator == NULL) { |
152 | 0 | alloc.malloc = PyMem_RawMalloc; |
153 | 0 | alloc.free = PyMem_RawFree; |
154 | 0 | } |
155 | 0 | else |
156 | 0 | alloc = *allocator; |
157 | |
|
158 | 0 | ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); |
159 | 0 | if (ht == NULL) |
160 | 0 | return ht; |
161 | | |
162 | 0 | ht->num_buckets = round_size(init_size); |
163 | 0 | ht->entries = 0; |
164 | 0 | ht->key_size = key_size; |
165 | 0 | ht->data_size = data_size; |
166 | |
|
167 | 0 | buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); |
168 | 0 | ht->buckets = alloc.malloc(buckets_size); |
169 | 0 | if (ht->buckets == NULL) { |
170 | 0 | alloc.free(ht); |
171 | 0 | return NULL; |
172 | 0 | } |
173 | 0 | memset(ht->buckets, 0, buckets_size); |
174 | |
|
175 | 0 | ht->hash_func = hash_func; |
176 | 0 | ht->compare_func = compare_func; |
177 | 0 | ht->alloc = alloc; |
178 | 0 | return ht; |
179 | 0 | } |
180 | | |
181 | | |
182 | | _Py_hashtable_t * |
183 | | _Py_hashtable_new(size_t key_size, size_t data_size, |
184 | | _Py_hashtable_hash_func hash_func, |
185 | | _Py_hashtable_compare_func compare_func) |
186 | 0 | { |
187 | 0 | return _Py_hashtable_new_full(key_size, data_size, |
188 | 0 | HASHTABLE_MIN_SIZE, |
189 | 0 | hash_func, compare_func, |
190 | 0 | NULL); |
191 | 0 | } |
192 | | |
193 | | |
194 | | size_t |
195 | | _Py_hashtable_size(_Py_hashtable_t *ht) |
196 | 0 | { |
197 | 0 | size_t size; |
198 | |
|
199 | 0 | size = sizeof(_Py_hashtable_t); |
200 | | |
201 | | /* buckets */ |
202 | 0 | size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); |
203 | | |
204 | | /* entries */ |
205 | 0 | size += ht->entries * HASHTABLE_ITEM_SIZE(ht); |
206 | |
|
207 | 0 | return size; |
208 | 0 | } |
209 | | |
210 | | |
211 | | #ifdef Py_DEBUG |
212 | | void |
213 | | _Py_hashtable_print_stats(_Py_hashtable_t *ht) |
214 | | { |
215 | | size_t size; |
216 | | size_t chain_len, max_chain_len, total_chain_len, nchains; |
217 | | _Py_hashtable_entry_t *entry; |
218 | | size_t hv; |
219 | | double load; |
220 | | |
221 | | size = _Py_hashtable_size(ht); |
222 | | |
223 | | load = (double)ht->entries / ht->num_buckets; |
224 | | |
225 | | max_chain_len = 0; |
226 | | total_chain_len = 0; |
227 | | nchains = 0; |
228 | | for (hv = 0; hv < ht->num_buckets; hv++) { |
229 | | entry = TABLE_HEAD(ht, hv); |
230 | | if (entry != NULL) { |
231 | | chain_len = 0; |
232 | | for (; entry; entry = ENTRY_NEXT(entry)) { |
233 | | chain_len++; |
234 | | } |
235 | | if (chain_len > max_chain_len) |
236 | | max_chain_len = chain_len; |
237 | | total_chain_len += chain_len; |
238 | | nchains++; |
239 | | } |
240 | | } |
241 | | printf("hash table %p: entries=%" |
242 | | PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", |
243 | | (void *)ht, ht->entries, ht->num_buckets, load * 100.0); |
244 | | if (nchains) |
245 | | printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); |
246 | | printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n", |
247 | | max_chain_len, size / 1024); |
248 | | } |
249 | | #endif |
250 | | |
251 | | |
252 | | _Py_hashtable_entry_t * |
253 | | _Py_hashtable_get_entry(_Py_hashtable_t *ht, |
254 | | size_t key_size, const void *pkey) |
255 | 0 | { |
256 | 0 | Py_uhash_t key_hash; |
257 | 0 | size_t index; |
258 | 0 | _Py_hashtable_entry_t *entry; |
259 | |
|
260 | 0 | assert(key_size == ht->key_size); |
261 | |
|
262 | 0 | key_hash = ht->hash_func(ht, pkey); |
263 | 0 | index = key_hash & (ht->num_buckets - 1); |
264 | |
|
265 | 0 | for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { |
266 | 0 | if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) |
267 | 0 | break; |
268 | 0 | } |
269 | |
|
270 | 0 | return entry; |
271 | 0 | } |
272 | | |
273 | | |
274 | | static int |
275 | | _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, |
276 | | void *data, size_t data_size) |
277 | 0 | { |
278 | 0 | Py_uhash_t key_hash; |
279 | 0 | size_t index; |
280 | 0 | _Py_hashtable_entry_t *entry, *previous; |
281 | |
|
282 | 0 | assert(key_size == ht->key_size); |
283 | |
|
284 | 0 | key_hash = ht->hash_func(ht, pkey); |
285 | 0 | index = key_hash & (ht->num_buckets - 1); |
286 | |
|
287 | 0 | previous = NULL; |
288 | 0 | for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { |
289 | 0 | if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) |
290 | 0 | break; |
291 | 0 | previous = entry; |
292 | 0 | } |
293 | |
|
294 | 0 | if (entry == NULL) |
295 | 0 | return 0; |
296 | | |
297 | 0 | _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, |
298 | 0 | (_Py_slist_item_t *)entry); |
299 | 0 | ht->entries--; |
300 | |
|
301 | 0 | if (data != NULL) |
302 | 0 | ENTRY_READ_PDATA(ht, entry, data_size, data); |
303 | 0 | ht->alloc.free(entry); |
304 | |
|
305 | 0 | if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) |
306 | 0 | hashtable_rehash(ht); |
307 | 0 | return 1; |
308 | 0 | } |
309 | | |
310 | | |
311 | | int |
312 | | _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, |
313 | | size_t data_size, const void *data) |
314 | 0 | { |
315 | 0 | Py_uhash_t key_hash; |
316 | 0 | size_t index; |
317 | 0 | _Py_hashtable_entry_t *entry; |
318 | |
|
319 | 0 | assert(key_size == ht->key_size); |
320 | |
|
321 | 0 | assert(data != NULL || data_size == 0); |
322 | | #ifndef NDEBUG |
323 | | /* Don't write the assertion on a single line because it is interesting |
324 | | to know the duplicated entry if the assertion failed. The entry can |
325 | | be read using a debugger. */ |
326 | | entry = _Py_hashtable_get_entry(ht, key_size, pkey); |
327 | | assert(entry == NULL); |
328 | | #endif |
329 | |
|
330 | 0 | key_hash = ht->hash_func(ht, pkey); |
331 | 0 | index = key_hash & (ht->num_buckets - 1); |
332 | |
|
333 | 0 | entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); |
334 | 0 | if (entry == NULL) { |
335 | | /* memory allocation failed */ |
336 | 0 | return -1; |
337 | 0 | } |
338 | | |
339 | 0 | entry->key_hash = key_hash; |
340 | 0 | memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size); |
341 | 0 | if (data) |
342 | 0 | ENTRY_WRITE_PDATA(ht, entry, data_size, data); |
343 | |
|
344 | 0 | _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); |
345 | 0 | ht->entries++; |
346 | |
|
347 | 0 | if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) |
348 | 0 | hashtable_rehash(ht); |
349 | 0 | return 0; |
350 | 0 | } |
351 | | |
352 | | |
353 | | int |
354 | | _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, |
355 | | size_t data_size, void *data) |
356 | 0 | { |
357 | 0 | _Py_hashtable_entry_t *entry; |
358 | |
|
359 | 0 | assert(data != NULL); |
360 | |
|
361 | 0 | entry = _Py_hashtable_get_entry(ht, key_size, pkey); |
362 | 0 | if (entry == NULL) |
363 | 0 | return 0; |
364 | 0 | ENTRY_READ_PDATA(ht, entry, data_size, data); |
365 | 0 | return 1; |
366 | 0 | } |
367 | | |
368 | | |
369 | | int |
370 | | _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, |
371 | | size_t data_size, void *data) |
372 | 0 | { |
373 | 0 | assert(data != NULL); |
374 | 0 | return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size); |
375 | 0 | } |
376 | | |
377 | | |
378 | | /* Code commented since the function is not needed in Python */ |
379 | | #if 0 |
380 | | void |
381 | | _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) |
382 | | { |
383 | | #ifndef NDEBUG |
384 | | int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); |
385 | | assert(found); |
386 | | #else |
387 | | (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); |
388 | | #endif |
389 | | } |
390 | | #endif |
391 | | |
392 | | |
393 | | int |
394 | | _Py_hashtable_foreach(_Py_hashtable_t *ht, |
395 | | _Py_hashtable_foreach_func func, |
396 | | void *arg) |
397 | 0 | { |
398 | 0 | _Py_hashtable_entry_t *entry; |
399 | 0 | size_t hv; |
400 | |
|
401 | 0 | for (hv = 0; hv < ht->num_buckets; hv++) { |
402 | 0 | for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { |
403 | 0 | int res = func(ht, entry, arg); |
404 | 0 | if (res) |
405 | 0 | return res; |
406 | 0 | } |
407 | 0 | } |
408 | 0 | return 0; |
409 | 0 | } |
410 | | |
411 | | |
412 | | static void |
413 | | hashtable_rehash(_Py_hashtable_t *ht) |
414 | 0 | { |
415 | 0 | size_t buckets_size, new_size, bucket; |
416 | 0 | _Py_slist_t *old_buckets = NULL; |
417 | 0 | size_t old_num_buckets; |
418 | |
|
419 | 0 | new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); |
420 | 0 | if (new_size == ht->num_buckets) |
421 | 0 | return; |
422 | | |
423 | 0 | old_num_buckets = ht->num_buckets; |
424 | |
|
425 | 0 | buckets_size = new_size * sizeof(ht->buckets[0]); |
426 | 0 | old_buckets = ht->buckets; |
427 | 0 | ht->buckets = ht->alloc.malloc(buckets_size); |
428 | 0 | if (ht->buckets == NULL) { |
429 | | /* cancel rehash on memory allocation failure */ |
430 | 0 | ht->buckets = old_buckets ; |
431 | | /* memory allocation failed */ |
432 | 0 | return; |
433 | 0 | } |
434 | 0 | memset(ht->buckets, 0, buckets_size); |
435 | |
|
436 | 0 | ht->num_buckets = new_size; |
437 | |
|
438 | 0 | for (bucket = 0; bucket < old_num_buckets; bucket++) { |
439 | 0 | _Py_hashtable_entry_t *entry, *next; |
440 | 0 | for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { |
441 | 0 | size_t entry_index; |
442 | | |
443 | |
|
444 | 0 | assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash); |
445 | 0 | next = ENTRY_NEXT(entry); |
446 | 0 | entry_index = entry->key_hash & (new_size - 1); |
447 | |
|
448 | 0 | _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); |
449 | 0 | } |
450 | 0 | } |
451 | |
|
452 | 0 | ht->alloc.free(old_buckets); |
453 | 0 | } |
454 | | |
455 | | |
456 | | void |
457 | | _Py_hashtable_clear(_Py_hashtable_t *ht) |
458 | 0 | { |
459 | 0 | _Py_hashtable_entry_t *entry, *next; |
460 | 0 | size_t i; |
461 | |
|
462 | 0 | for (i=0; i < ht->num_buckets; i++) { |
463 | 0 | for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { |
464 | 0 | next = ENTRY_NEXT(entry); |
465 | 0 | ht->alloc.free(entry); |
466 | 0 | } |
467 | 0 | _Py_slist_init(&ht->buckets[i]); |
468 | 0 | } |
469 | 0 | ht->entries = 0; |
470 | 0 | hashtable_rehash(ht); |
471 | 0 | } |
472 | | |
473 | | |
474 | | void |
475 | | _Py_hashtable_destroy(_Py_hashtable_t *ht) |
476 | 0 | { |
477 | 0 | size_t i; |
478 | |
|
479 | 0 | for (i = 0; i < ht->num_buckets; i++) { |
480 | 0 | _Py_slist_item_t *entry = ht->buckets[i].head; |
481 | 0 | while (entry) { |
482 | 0 | _Py_slist_item_t *entry_next = entry->next; |
483 | 0 | ht->alloc.free(entry); |
484 | 0 | entry = entry_next; |
485 | 0 | } |
486 | 0 | } |
487 | |
|
488 | 0 | ht->alloc.free(ht->buckets); |
489 | 0 | ht->alloc.free(ht); |
490 | 0 | } |
491 | | |
492 | | |
493 | | _Py_hashtable_t * |
494 | | _Py_hashtable_copy(_Py_hashtable_t *src) |
495 | 0 | { |
496 | 0 | const size_t key_size = src->key_size; |
497 | 0 | const size_t data_size = src->data_size; |
498 | 0 | _Py_hashtable_t *dst; |
499 | 0 | _Py_hashtable_entry_t *entry; |
500 | 0 | size_t bucket; |
501 | 0 | int err; |
502 | |
|
503 | 0 | dst = _Py_hashtable_new_full(key_size, data_size, |
504 | 0 | src->num_buckets, |
505 | 0 | src->hash_func, |
506 | 0 | src->compare_func, |
507 | 0 | &src->alloc); |
508 | 0 | if (dst == NULL) |
509 | 0 | return NULL; |
510 | | |
511 | 0 | for (bucket=0; bucket < src->num_buckets; bucket++) { |
512 | 0 | entry = TABLE_HEAD(src, bucket); |
513 | 0 | for (; entry; entry = ENTRY_NEXT(entry)) { |
514 | 0 | const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry); |
515 | 0 | const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry); |
516 | 0 | err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata); |
517 | 0 | if (err) { |
518 | 0 | _Py_hashtable_destroy(dst); |
519 | 0 | return NULL; |
520 | 0 | } |
521 | 0 | } |
522 | 0 | } |
523 | 0 | return dst; |
524 | 0 | } |