/src/cpython/Python/hashtable.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* The implementation of the hash table (_Py_hashtable_t) is based on the |
2 | | cfuhash project: |
3 | | http://sourceforge.net/projects/libcfu/ |
4 | | |
5 | | Copyright of cfuhash: |
6 | | ---------------------------------- |
7 | | Creation date: 2005-06-24 21:22:40 |
8 | | Authors: Don |
9 | | Change log: |
10 | | |
11 | | Copyright (c) 2005 Don Owens |
12 | | All rights reserved. |
13 | | |
14 | | This code is released under the BSD license: |
15 | | |
16 | | Redistribution and use in source and binary forms, with or without |
17 | | modification, are permitted provided that the following conditions |
18 | | are met: |
19 | | |
20 | | * Redistributions of source code must retain the above copyright |
21 | | notice, this list of conditions and the following disclaimer. |
22 | | |
23 | | * Redistributions in binary form must reproduce the above |
24 | | copyright notice, this list of conditions and the following |
25 | | disclaimer in the documentation and/or other materials provided |
26 | | with the distribution. |
27 | | |
28 | | * Neither the name of the author nor the names of its |
29 | | contributors may be used to endorse or promote products derived |
30 | | from this software without specific prior written permission. |
31 | | |
32 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
33 | | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
34 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
35 | | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 | | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 | | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
38 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
39 | | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
40 | | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
41 | | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
42 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
43 | | OF THE POSSIBILITY OF SUCH DAMAGE. |
44 | | ---------------------------------- |
45 | | */ |
46 | | |
47 | | #include "Python.h" |
48 | | #include "pycore_hashtable.h" // export _Py_hashtable_new() |
49 | | #include "pycore_pyhash.h" // _Py_HashPointerRaw() |
50 | | |
51 | 1.47k | #define HASHTABLE_MIN_SIZE 16 |
52 | 53.3k | #define HASHTABLE_HIGH 0.50 |
53 | 1.06k | #define HASHTABLE_LOW 0.10 |
54 | 1.06k | #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) |
55 | | |
56 | | #define BUCKETS_HEAD(SLIST) \ |
57 | 168k | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) |
58 | | #define TABLE_HEAD(HT, BUCKET) \ |
59 | 3.24M | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) |
60 | | #define ENTRY_NEXT(ENTRY) \ |
61 | 1.97M | ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) |
62 | | |
63 | | /* Forward declaration */ |
64 | | static int hashtable_rehash(_Py_hashtable_t *ht); |
65 | | |
66 | | static void |
67 | | _Py_slist_init(_Py_slist_t *list) |
68 | 0 | { |
69 | 0 | list->head = NULL; |
70 | 0 | } |
71 | | |
72 | | |
73 | | static void |
74 | | _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) |
75 | 136k | { |
76 | 136k | item->next = list->head; |
77 | 136k | list->head = item; |
78 | 136k | } |
79 | | |
80 | | |
81 | | static void |
82 | | _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, |
83 | | _Py_slist_item_t *item) |
84 | 0 | { |
85 | 0 | if (previous != NULL) |
86 | 0 | previous->next = item->next; |
87 | 0 | else |
88 | 0 | list->head = item->next; |
89 | 0 | } |
90 | | |
91 | | |
92 | | Py_uhash_t |
93 | | _Py_hashtable_hash_ptr(const void *key) |
94 | 133k | { |
95 | 133k | return (Py_uhash_t)_Py_HashPointerRaw(key); |
96 | 133k | } |
97 | | |
98 | | |
99 | | int |
100 | | _Py_hashtable_compare_direct(const void *key1, const void *key2) |
101 | 0 | { |
102 | 0 | return (key1 == key2); |
103 | 0 | } |
104 | | |
105 | | |
106 | | /* makes sure the real size of the buckets array is a power of 2 */ |
107 | | static size_t |
108 | | round_size(size_t s) |
109 | 1.06k | { |
110 | 1.06k | size_t i; |
111 | 1.06k | if (s < HASHTABLE_MIN_SIZE) |
112 | 0 | return HASHTABLE_MIN_SIZE; |
113 | 1.06k | i = 1; |
114 | 8.55k | while (i < s) |
115 | 7.49k | i <<= 1; |
116 | 1.06k | return i; |
117 | 1.06k | } |
118 | | |
119 | | |
120 | | size_t |
121 | | _Py_hashtable_size(const _Py_hashtable_t *ht) |
122 | 0 | { |
123 | 0 | size_t size = sizeof(_Py_hashtable_t); |
124 | | /* buckets */ |
125 | 0 | size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *); |
126 | | /* entries */ |
127 | 0 | size += ht->nentries * sizeof(_Py_hashtable_entry_t); |
128 | 0 | return size; |
129 | 0 | } |
130 | | |
131 | | |
132 | | size_t |
133 | | _Py_hashtable_len(const _Py_hashtable_t *ht) |
134 | 0 | { |
135 | 0 | return ht->nentries; |
136 | 0 | } |
137 | | |
138 | | |
139 | | _Py_hashtable_entry_t * |
140 | | _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key) |
141 | 3.03M | { |
142 | 3.03M | Py_uhash_t key_hash = ht->hash_func(key); |
143 | 3.03M | size_t index = key_hash & (ht->nbuckets - 1); |
144 | 3.03M | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
145 | 4.85M | while (1) { |
146 | 4.85M | if (entry == NULL) { |
147 | 2.78M | return NULL; |
148 | 2.78M | } |
149 | 2.06M | if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) { |
150 | 251k | break; |
151 | 251k | } |
152 | 1.81M | entry = ENTRY_NEXT(entry); |
153 | 1.81M | } |
154 | 251k | return entry; |
155 | 3.03M | } |
156 | | |
157 | | |
158 | | // Specialized for: |
159 | | // hash_func == _Py_hashtable_hash_ptr |
160 | | // compare_func == _Py_hashtable_compare_direct |
161 | | static _Py_hashtable_entry_t * |
162 | | _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key) |
163 | 97.5k | { |
164 | 97.5k | Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key); |
165 | 97.5k | size_t index = key_hash & (ht->nbuckets - 1); |
166 | 97.5k | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
167 | 130k | while (1) { |
168 | 130k | if (entry == NULL) { |
169 | 35.7k | return NULL; |
170 | 35.7k | } |
171 | | // Compare directly keys (ignore entry->key_hash) |
172 | 94.9k | if (entry->key == key) { |
173 | 61.7k | break; |
174 | 61.7k | } |
175 | 33.1k | entry = ENTRY_NEXT(entry); |
176 | 33.1k | } |
177 | 61.7k | return entry; |
178 | 97.5k | } |
179 | | |
180 | | |
181 | | void* |
182 | | _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key) |
183 | 0 | { |
184 | 0 | Py_uhash_t key_hash = ht->hash_func(key); |
185 | 0 | size_t index = key_hash & (ht->nbuckets - 1); |
186 | |
|
187 | 0 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
188 | 0 | _Py_hashtable_entry_t *previous = NULL; |
189 | 0 | while (1) { |
190 | 0 | if (entry == NULL) { |
191 | | // not found |
192 | 0 | return NULL; |
193 | 0 | } |
194 | 0 | if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) { |
195 | 0 | break; |
196 | 0 | } |
197 | 0 | previous = entry; |
198 | 0 | entry = ENTRY_NEXT(entry); |
199 | 0 | } |
200 | | |
201 | 0 | _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, |
202 | 0 | (_Py_slist_item_t *)entry); |
203 | 0 | ht->nentries--; |
204 | |
|
205 | 0 | void *value = entry->value; |
206 | 0 | ht->alloc.free(entry); |
207 | |
|
208 | 0 | if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) { |
209 | | // Ignore failure: error cannot be reported to the caller |
210 | 0 | hashtable_rehash(ht); |
211 | 0 | } |
212 | 0 | return value; |
213 | 0 | } |
214 | | |
215 | | |
216 | | int |
217 | | _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value) |
218 | 52.3k | { |
219 | 52.3k | _Py_hashtable_entry_t *entry; |
220 | | |
221 | | #ifndef NDEBUG |
222 | | /* Don't write the assertion on a single line because it is interesting |
223 | | to know the duplicated entry if the assertion failed. The entry can |
224 | | be read using a debugger. */ |
225 | | entry = ht->get_entry_func(ht, key); |
226 | | assert(entry == NULL); |
227 | | #endif |
228 | | |
229 | 52.3k | entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t)); |
230 | 52.3k | if (entry == NULL) { |
231 | | /* memory allocation failed */ |
232 | 0 | return -1; |
233 | 0 | } |
234 | | |
235 | 52.3k | entry->key_hash = ht->hash_func(key); |
236 | 52.3k | entry->key = (void *)key; |
237 | 52.3k | entry->value = value; |
238 | | |
239 | 52.3k | ht->nentries++; |
240 | 52.3k | if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) { |
241 | 1.06k | if (hashtable_rehash(ht) < 0) { |
242 | 0 | ht->nentries--; |
243 | 0 | ht->alloc.free(entry); |
244 | 0 | return -1; |
245 | 0 | } |
246 | 1.06k | } |
247 | | |
248 | 52.3k | size_t index = entry->key_hash & (ht->nbuckets - 1); |
249 | 52.3k | _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); |
250 | 52.3k | return 0; |
251 | 52.3k | } |
252 | | |
253 | | |
254 | | void* |
255 | | _Py_hashtable_get(_Py_hashtable_t *ht, const void *key) |
256 | 3.03M | { |
257 | 3.03M | _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key); |
258 | 3.03M | if (entry != NULL) { |
259 | 251k | return entry->value; |
260 | 251k | } |
261 | 2.78M | else { |
262 | 2.78M | return NULL; |
263 | 2.78M | } |
264 | 3.03M | } |
265 | | |
266 | | |
267 | | int |
268 | | _Py_hashtable_foreach(_Py_hashtable_t *ht, |
269 | | _Py_hashtable_foreach_func func, |
270 | | void *user_data) |
271 | 0 | { |
272 | 0 | for (size_t hv = 0; hv < ht->nbuckets; hv++) { |
273 | 0 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv); |
274 | 0 | while (entry != NULL) { |
275 | 0 | int res = func(ht, entry->key, entry->value, user_data); |
276 | 0 | if (res) { |
277 | 0 | return res; |
278 | 0 | } |
279 | 0 | entry = ENTRY_NEXT(entry); |
280 | 0 | } |
281 | 0 | } |
282 | 0 | return 0; |
283 | 0 | } |
284 | | |
285 | | |
286 | | static int |
287 | | hashtable_rehash(_Py_hashtable_t *ht) |
288 | 1.06k | { |
289 | 1.06k | size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR)); |
290 | 1.06k | if (new_size == ht->nbuckets) { |
291 | 0 | return 0; |
292 | 0 | } |
293 | | |
294 | 1.06k | size_t buckets_size = new_size * sizeof(ht->buckets[0]); |
295 | 1.06k | _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size); |
296 | 1.06k | if (new_buckets == NULL) { |
297 | | /* memory allocation failed */ |
298 | 0 | return -1; |
299 | 0 | } |
300 | 1.06k | memset(new_buckets, 0, buckets_size); |
301 | | |
302 | 169k | for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) { |
303 | 168k | _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]); |
304 | 252k | while (entry != NULL) { |
305 | 84.1k | assert(ht->hash_func(entry->key) == entry->key_hash); |
306 | 84.1k | _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); |
307 | 84.1k | size_t entry_index = entry->key_hash & (new_size - 1); |
308 | | |
309 | 84.1k | _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry); |
310 | | |
311 | 84.1k | entry = next; |
312 | 84.1k | } |
313 | 168k | } |
314 | | |
315 | 1.06k | ht->alloc.free(ht->buckets); |
316 | 1.06k | ht->nbuckets = new_size; |
317 | 1.06k | ht->buckets = new_buckets; |
318 | 1.06k | return 0; |
319 | 1.06k | } |
320 | | |
321 | | |
322 | | _Py_hashtable_t * |
323 | | _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func, |
324 | | _Py_hashtable_compare_func compare_func, |
325 | | _Py_hashtable_destroy_func key_destroy_func, |
326 | | _Py_hashtable_destroy_func value_destroy_func, |
327 | | _Py_hashtable_allocator_t *allocator) |
328 | 409 | { |
329 | 409 | _Py_hashtable_allocator_t alloc; |
330 | 409 | if (allocator == NULL) { |
331 | 313 | alloc.malloc = PyMem_Malloc; |
332 | 313 | alloc.free = PyMem_Free; |
333 | 313 | } |
334 | 96 | else { |
335 | 96 | alloc = *allocator; |
336 | 96 | } |
337 | | |
338 | 409 | _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); |
339 | 409 | if (ht == NULL) { |
340 | 0 | return ht; |
341 | 0 | } |
342 | | |
343 | 409 | ht->nbuckets = HASHTABLE_MIN_SIZE; |
344 | 409 | ht->nentries = 0; |
345 | | |
346 | 409 | size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]); |
347 | 409 | ht->buckets = alloc.malloc(buckets_size); |
348 | 409 | if (ht->buckets == NULL) { |
349 | 0 | alloc.free(ht); |
350 | 0 | return NULL; |
351 | 0 | } |
352 | 409 | memset(ht->buckets, 0, buckets_size); |
353 | | |
354 | 409 | ht->get_entry_func = _Py_hashtable_get_entry_generic; |
355 | 409 | ht->hash_func = hash_func; |
356 | 409 | ht->compare_func = compare_func; |
357 | 409 | ht->key_destroy_func = key_destroy_func; |
358 | 409 | ht->value_destroy_func = value_destroy_func; |
359 | 409 | ht->alloc = alloc; |
360 | 409 | if (ht->hash_func == _Py_hashtable_hash_ptr |
361 | 409 | && ht->compare_func == _Py_hashtable_compare_direct) |
362 | 327 | { |
363 | 327 | ht->get_entry_func = _Py_hashtable_get_entry_ptr; |
364 | 327 | } |
365 | 409 | return ht; |
366 | 409 | } |
367 | | |
368 | | |
369 | | _Py_hashtable_t * |
370 | | _Py_hashtable_new(_Py_hashtable_hash_func hash_func, |
371 | | _Py_hashtable_compare_func compare_func) |
372 | 0 | { |
373 | 0 | return _Py_hashtable_new_full(hash_func, compare_func, |
374 | 0 | NULL, NULL, NULL); |
375 | 0 | } |
376 | | |
377 | | |
378 | | static void |
379 | | _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry) |
380 | 35.7k | { |
381 | 35.7k | if (ht->key_destroy_func) { |
382 | 35.7k | ht->key_destroy_func(entry->key); |
383 | 35.7k | } |
384 | 35.7k | if (ht->value_destroy_func) { |
385 | 0 | ht->value_destroy_func(entry->value); |
386 | 0 | } |
387 | 35.7k | ht->alloc.free(entry); |
388 | 35.7k | } |
389 | | |
390 | | |
391 | | void |
392 | | _Py_hashtable_clear(_Py_hashtable_t *ht) |
393 | 0 | { |
394 | 0 | for (size_t i=0; i < ht->nbuckets; i++) { |
395 | 0 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); |
396 | 0 | while (entry != NULL) { |
397 | 0 | _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); |
398 | 0 | _Py_hashtable_destroy_entry(ht, entry); |
399 | 0 | entry = next; |
400 | 0 | } |
401 | 0 | _Py_slist_init(&ht->buckets[i]); |
402 | 0 | } |
403 | 0 | ht->nentries = 0; |
404 | | // Ignore failure: clear function is not expected to fail |
405 | | // because of a memory allocation failure. |
406 | 0 | (void)hashtable_rehash(ht); |
407 | 0 | } |
408 | | |
409 | | |
410 | | void |
411 | | _Py_hashtable_destroy(_Py_hashtable_t *ht) |
412 | 311 | { |
413 | 108k | for (size_t i = 0; i < ht->nbuckets; i++) { |
414 | 107k | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); |
415 | 143k | while (entry) { |
416 | 35.7k | _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry); |
417 | 35.7k | _Py_hashtable_destroy_entry(ht, entry); |
418 | 35.7k | entry = entry_next; |
419 | 35.7k | } |
420 | 107k | } |
421 | | |
422 | 311 | ht->alloc.free(ht->buckets); |
423 | 311 | ht->alloc.free(ht); |
424 | 311 | } |