Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/hashtable.c
Line
Count
Source
1
/* The implementation of the hash table (_Py_hashtable_t) is based on the
2
   cfuhash project:
3
   http://sourceforge.net/projects/libcfu/
4
5
   Copyright of cfuhash:
6
   ----------------------------------
7
   Creation date: 2005-06-24 21:22:40
8
   Authors: Don
9
   Change log:
10
11
   Copyright (c) 2005 Don Owens
12
   All rights reserved.
13
14
   This code is released under the BSD license:
15
16
   Redistribution and use in source and binary forms, with or without
17
   modification, are permitted provided that the following conditions
18
   are met:
19
20
     * Redistributions of source code must retain the above copyright
21
       notice, this list of conditions and the following disclaimer.
22
23
     * Redistributions in binary form must reproduce the above
24
       copyright notice, this list of conditions and the following
25
       disclaimer in the documentation and/or other materials provided
26
       with the distribution.
27
28
     * Neither the name of the author nor the names of its
29
       contributors may be used to endorse or promote products derived
30
       from this software without specific prior written permission.
31
32
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35
   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36
   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37
   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39
   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41
   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43
   OF THE POSSIBILITY OF SUCH DAMAGE.
44
   ----------------------------------
45
*/
46
47
#include "Python.h"
48
#include "pycore_hashtable.h"     // export _Py_hashtable_new()
49
#include "pycore_pyhash.h"        // _Py_HashPointerRaw()
50
51
7.64k
#define HASHTABLE_MIN_SIZE 16
52
94.1k
#define HASHTABLE_HIGH 0.50
53
1.83k
#define HASHTABLE_LOW 0.10
54
1.83k
#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
55
56
#define BUCKETS_HEAD(SLIST) \
57
244k
        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
58
#define TABLE_HEAD(HT, BUCKET) \
59
6.24M
        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
60
#define ENTRY_NEXT(ENTRY) \
61
1.27M
        ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
62
63
/* Forward declaration */
64
static int hashtable_rehash(_Py_hashtable_t *ht);
65
66
static void
67
_Py_slist_init(_Py_slist_t *list)
68
0
{
69
0
    list->head = NULL;
70
0
}
71
72
73
static void
74
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75
214k
{
76
214k
    item->next = list->head;
77
214k
    list->head = item;
78
214k
}
79
80
81
static void
82
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
83
                 _Py_slist_item_t *item)
84
0
{
85
0
    if (previous != NULL)
86
0
        previous->next = item->next;
87
0
    else
88
0
        list->head = item->next;
89
0
}
90
91
92
Py_uhash_t
93
_Py_hashtable_hash_ptr(const void *key)
94
142k
{
95
142k
    return (Py_uhash_t)_Py_HashPointerRaw(key);
96
142k
}
97
98
99
int
100
_Py_hashtable_compare_direct(const void *key1, const void *key2)
101
0
{
102
0
    return (key1 == key2);
103
0
}
104
105
106
/* makes sure the real size of the buckets array is a power of 2 */
107
static size_t
108
round_size(size_t s)
109
1.83k
{
110
1.83k
    size_t i;
111
1.83k
    if (s < HASHTABLE_MIN_SIZE)
112
0
        return HASHTABLE_MIN_SIZE;
113
1.83k
    i = 1;
114
13.7k
    while (i < s)
115
11.8k
        i <<= 1;
116
1.83k
    return i;
117
1.83k
}
118
119
120
size_t
121
_Py_hashtable_size(const _Py_hashtable_t *ht)
122
0
{
123
0
    size_t size = sizeof(_Py_hashtable_t);
124
    /* buckets */
125
0
    size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
126
    /* entries */
127
0
    size += ht->nentries * sizeof(_Py_hashtable_entry_t);
128
0
    return size;
129
0
}
130
131
132
size_t
133
_Py_hashtable_len(const _Py_hashtable_t *ht)
134
0
{
135
0
    return ht->nentries;
136
0
}
137
138
139
_Py_hashtable_entry_t *
140
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
141
5.97M
{
142
5.97M
    Py_uhash_t key_hash = ht->hash_func(key);
143
5.97M
    size_t index = key_hash & (ht->nbuckets - 1);
144
5.97M
    _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
145
7.05M
    while (1) {
146
7.05M
        if (entry == NULL) {
147
5.40M
            return NULL;
148
5.40M
        }
149
1.64M
        if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
150
564k
            break;
151
564k
        }
152
1.07M
        entry = ENTRY_NEXT(entry);
153
1.07M
    }
154
564k
    return entry;
155
5.97M
}
156
157
158
// Specialized for:
159
// hash_func == _Py_hashtable_hash_ptr
160
// compare_func == _Py_hashtable_compare_direct
161
static _Py_hashtable_entry_t *
162
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
163
90.4k
{
164
90.4k
    Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
165
90.4k
    size_t index = key_hash & (ht->nbuckets - 1);
166
90.4k
    _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
167
117k
    while (1) {
168
117k
        if (entry == NULL) {
169
51.7k
            return NULL;
170
51.7k
        }
171
        // Compare directly keys (ignore entry->key_hash)
172
65.3k
        if (entry->key == key) {
173
38.7k
            break;
174
38.7k
        }
175
26.5k
        entry = ENTRY_NEXT(entry);
176
26.5k
    }
177
38.7k
    return entry;
178
90.4k
}
179
180
181
void*
182
_Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
183
0
{
184
0
    Py_uhash_t key_hash = ht->hash_func(key);
185
0
    size_t index = key_hash & (ht->nbuckets - 1);
186
187
0
    _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
188
0
    _Py_hashtable_entry_t *previous = NULL;
189
0
    while (1) {
190
0
        if (entry == NULL) {
191
            // not found
192
0
            return NULL;
193
0
        }
194
0
        if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
195
0
            break;
196
0
        }
197
0
        previous = entry;
198
0
        entry = ENTRY_NEXT(entry);
199
0
    }
200
201
0
    _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
202
0
                     (_Py_slist_item_t *)entry);
203
0
    ht->nentries--;
204
205
0
    void *value = entry->value;
206
0
    ht->alloc.free(entry);
207
208
0
    if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
209
        // Ignore failure: error cannot be reported to the caller
210
0
        hashtable_rehash(ht);
211
0
    }
212
0
    return value;
213
0
}
214
215
216
int
217
_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
218
92.2k
{
219
92.2k
    _Py_hashtable_entry_t *entry;
220
221
#ifndef NDEBUG
222
    /* Don't write the assertion on a single line because it is interesting
223
       to know the duplicated entry if the assertion failed. The entry can
224
       be read using a debugger. */
225
    entry = ht->get_entry_func(ht, key);
226
    assert(entry == NULL);
227
#endif
228
229
92.2k
    entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
230
92.2k
    if (entry == NULL) {
231
        /* memory allocation failed */
232
0
        return -1;
233
0
    }
234
235
92.2k
    entry->key_hash = ht->hash_func(key);
236
92.2k
    entry->key = (void *)key;
237
92.2k
    entry->value = value;
238
239
92.2k
    ht->nentries++;
240
92.2k
    if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
241
1.83k
        if (hashtable_rehash(ht) < 0) {
242
0
            ht->nentries--;
243
0
            ht->alloc.free(entry);
244
0
            return -1;
245
0
        }
246
1.83k
    }
247
248
92.2k
    size_t index = entry->key_hash & (ht->nbuckets - 1);
249
92.2k
    _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
250
92.2k
    return 0;
251
92.2k
}
252
253
254
void*
255
_Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
256
5.97M
{
257
5.97M
    _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
258
5.97M
    if (entry != NULL) {
259
564k
        return entry->value;
260
564k
    }
261
5.40M
    else {
262
5.40M
        return NULL;
263
5.40M
    }
264
5.97M
}
265
266
267
int
268
_Py_hashtable_foreach(_Py_hashtable_t *ht,
269
                      _Py_hashtable_foreach_func func,
270
                      void *user_data)
271
0
{
272
0
    for (size_t hv = 0; hv < ht->nbuckets; hv++) {
273
0
        _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
274
0
        while (entry != NULL) {
275
0
            int res = func(ht, entry->key, entry->value, user_data);
276
0
            if (res) {
277
0
                return res;
278
0
            }
279
0
            entry = ENTRY_NEXT(entry);
280
0
        }
281
0
    }
282
0
    return 0;
283
0
}
284
285
286
static int
287
hashtable_rehash(_Py_hashtable_t *ht)
288
1.83k
{
289
1.83k
    size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
290
1.83k
    if (new_size == ht->nbuckets) {
291
0
        return 0;
292
0
    }
293
294
1.83k
    size_t buckets_size = new_size * sizeof(ht->buckets[0]);
295
1.83k
    _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
296
1.83k
    if (new_buckets == NULL) {
297
        /* memory allocation failed */
298
0
        return -1;
299
0
    }
300
1.83k
    memset(new_buckets, 0, buckets_size);
301
302
246k
    for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
303
244k
        _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
304
366k
        while (entry != NULL) {
305
122k
            assert(ht->hash_func(entry->key) == entry->key_hash);
306
122k
            _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
307
122k
            size_t entry_index = entry->key_hash & (new_size - 1);
308
309
122k
            _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
310
311
122k
            entry = next;
312
122k
        }
313
244k
    }
314
315
1.83k
    ht->alloc.free(ht->buckets);
316
1.83k
    ht->nbuckets = new_size;
317
1.83k
    ht->buckets = new_buckets;
318
1.83k
    return 0;
319
1.83k
}
320
321
322
_Py_hashtable_t *
323
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
324
                       _Py_hashtable_compare_func compare_func,
325
                       _Py_hashtable_destroy_func key_destroy_func,
326
                       _Py_hashtable_destroy_func value_destroy_func,
327
                       _Py_hashtable_allocator_t *allocator)
328
5.80k
{
329
5.80k
    _Py_hashtable_allocator_t alloc;
330
5.80k
    if (allocator == NULL) {
331
5.59k
        alloc.malloc = PyMem_Malloc;
332
5.59k
        alloc.free = PyMem_Free;
333
5.59k
    }
334
216
    else {
335
216
        alloc = *allocator;
336
216
    }
337
338
5.80k
    _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
339
5.80k
    if (ht == NULL) {
340
0
        return ht;
341
0
    }
342
343
5.80k
    ht->nbuckets = HASHTABLE_MIN_SIZE;
344
5.80k
    ht->nentries = 0;
345
346
5.80k
    size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
347
5.80k
    ht->buckets = alloc.malloc(buckets_size);
348
5.80k
    if (ht->buckets == NULL) {
349
0
        alloc.free(ht);
350
0
        return NULL;
351
0
    }
352
5.80k
    memset(ht->buckets, 0, buckets_size);
353
354
5.80k
    ht->get_entry_func = _Py_hashtable_get_entry_generic;
355
5.80k
    ht->hash_func = hash_func;
356
5.80k
    ht->compare_func = compare_func;
357
5.80k
    ht->key_destroy_func = key_destroy_func;
358
5.80k
    ht->value_destroy_func = value_destroy_func;
359
5.80k
    ht->alloc = alloc;
360
5.80k
    if (ht->hash_func == _Py_hashtable_hash_ptr
361
5.62k
        && ht->compare_func == _Py_hashtable_compare_direct)
362
5.62k
    {
363
5.62k
        ht->get_entry_func = _Py_hashtable_get_entry_ptr;
364
5.62k
    }
365
5.80k
    return ht;
366
5.80k
}
367
368
369
_Py_hashtable_t *
370
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,
371
                  _Py_hashtable_compare_func compare_func)
372
5.34k
{
373
5.34k
    return _Py_hashtable_new_full(hash_func, compare_func,
374
5.34k
                                  NULL, NULL, NULL);
375
5.34k
}
376
377
378
static void
379
_Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
380
51.7k
{
381
51.7k
    if (ht->key_destroy_func) {
382
21.3k
        ht->key_destroy_func(entry->key);
383
21.3k
    }
384
51.7k
    if (ht->value_destroy_func) {
385
0
        ht->value_destroy_func(entry->value);
386
0
    }
387
51.7k
    ht->alloc.free(entry);
388
51.7k
}
389
390
391
void
392
_Py_hashtable_clear(_Py_hashtable_t *ht)
393
0
{
394
0
    for (size_t i=0; i < ht->nbuckets; i++) {
395
0
        _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
396
0
        while (entry != NULL) {
397
0
            _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
398
0
            _Py_hashtable_destroy_entry(ht, entry);
399
0
            entry = next;
400
0
        }
401
0
        _Py_slist_init(&ht->buckets[i]);
402
0
    }
403
0
    ht->nentries = 0;
404
    // Ignore failure: clear function is not expected to fail
405
    // because of a memory allocation failure.
406
0
    (void)hashtable_rehash(ht);
407
0
}
408
409
410
void
411
_Py_hashtable_destroy(_Py_hashtable_t *ht)
412
5.58k
{
413
192k
    for (size_t i = 0; i < ht->nbuckets; i++) {
414
186k
        _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
415
238k
        while (entry) {
416
51.7k
            _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
417
51.7k
            _Py_hashtable_destroy_entry(ht, entry);
418
51.7k
            entry = entry_next;
419
51.7k
        }
420
186k
    }
421
422
5.58k
    ht->alloc.free(ht->buckets);
423
5.58k
    ht->alloc.free(ht);
424
5.58k
}