Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Modules/hashtable.c
Line
Count
Source (jump to first uncovered line)
1
/* The implementation of the hash table (_Py_hashtable_t) is based on the
2
   cfuhash project:
3
   http://sourceforge.net/projects/libcfu/
4
5
   Copyright of cfuhash:
6
   ----------------------------------
7
   Creation date: 2005-06-24 21:22:40
8
   Authors: Don
9
   Change log:
10
11
   Copyright (c) 2005 Don Owens
12
   All rights reserved.
13
14
   This code is released under the BSD license:
15
16
   Redistribution and use in source and binary forms, with or without
17
   modification, are permitted provided that the following conditions
18
   are met:
19
20
     * Redistributions of source code must retain the above copyright
21
       notice, this list of conditions and the following disclaimer.
22
23
     * Redistributions in binary form must reproduce the above
24
       copyright notice, this list of conditions and the following
25
       disclaimer in the documentation and/or other materials provided
26
       with the distribution.
27
28
     * Neither the name of the author nor the names of its
29
       contributors may be used to endorse or promote products derived
30
       from this software without specific prior written permission.
31
32
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35
   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36
   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37
   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39
   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41
   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43
   OF THE POSSIBILITY OF SUCH DAMAGE.
44
   ----------------------------------
45
*/
46
47
#include "Python.h"
48
#include "hashtable.h"
49
50
0
#define HASHTABLE_MIN_SIZE 16
51
0
#define HASHTABLE_HIGH 0.50
52
0
#define HASHTABLE_LOW 0.10
53
0
#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55
#define BUCKETS_HEAD(SLIST) \
56
0
        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57
#define TABLE_HEAD(HT, BUCKET) \
58
0
        ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59
#define ENTRY_NEXT(ENTRY) \
60
0
        ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61
#define HASHTABLE_ITEM_SIZE(HT) \
62
0
        (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
63
64
#define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
65
0
    do { \
66
0
        assert((DATA_SIZE) == (TABLE)->data_size); \
67
0
        memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
68
0
                  (DATA_SIZE)); \
69
0
    } while (0)
70
71
#define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
72
0
    do { \
73
0
        assert((DATA_SIZE) == (TABLE)->data_size); \
74
0
        memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
75
0
                  (PDATA), (DATA_SIZE)); \
76
0
    } while (0)
77
78
/* Forward declaration */
79
static void hashtable_rehash(_Py_hashtable_t *ht);
80
81
static void
82
_Py_slist_init(_Py_slist_t *list)
83
0
{
84
0
    list->head = NULL;
85
0
}
86
87
88
static void
89
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
90
0
{
91
0
    item->next = list->head;
92
0
    list->head = item;
93
0
}
94
95
96
static void
97
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
98
                 _Py_slist_item_t *item)
99
0
{
100
0
    if (previous != NULL)
101
0
        previous->next = item->next;
102
0
    else
103
0
        list->head = item->next;
104
0
}
105
106
107
Py_uhash_t
108
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109
0
{
110
0
    void *key;
111
112
0
    _Py_HASHTABLE_READ_KEY(ht, pkey, key);
113
0
    return (Py_uhash_t)_Py_HashPointer(key);
114
0
}
115
116
117
int
118
_Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
119
                             const _Py_hashtable_entry_t *entry)
120
0
{
121
0
    const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
122
0
    return (memcmp(pkey, pkey2, ht->key_size) == 0);
123
0
}
124
125
126
/* makes sure the real size of the buckets array is a power of 2 */
127
static size_t
128
round_size(size_t s)
129
0
{
130
0
    size_t i;
131
0
    if (s < HASHTABLE_MIN_SIZE)
132
0
        return HASHTABLE_MIN_SIZE;
133
0
    i = 1;
134
0
    while (i < s)
135
0
        i <<= 1;
136
0
    return i;
137
0
}
138
139
140
_Py_hashtable_t *
141
_Py_hashtable_new_full(size_t key_size, size_t data_size,
142
                       size_t init_size,
143
                       _Py_hashtable_hash_func hash_func,
144
                       _Py_hashtable_compare_func compare_func,
145
                       _Py_hashtable_allocator_t *allocator)
146
0
{
147
0
    _Py_hashtable_t *ht;
148
0
    size_t buckets_size;
149
0
    _Py_hashtable_allocator_t alloc;
150
151
0
    if (allocator == NULL) {
152
0
        alloc.malloc = PyMem_RawMalloc;
153
0
        alloc.free = PyMem_RawFree;
154
0
    }
155
0
    else
156
0
        alloc = *allocator;
157
158
0
    ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
159
0
    if (ht == NULL)
160
0
        return ht;
161
162
0
    ht->num_buckets = round_size(init_size);
163
0
    ht->entries = 0;
164
0
    ht->key_size = key_size;
165
0
    ht->data_size = data_size;
166
167
0
    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
168
0
    ht->buckets = alloc.malloc(buckets_size);
169
0
    if (ht->buckets == NULL) {
170
0
        alloc.free(ht);
171
0
        return NULL;
172
0
    }
173
0
    memset(ht->buckets, 0, buckets_size);
174
175
0
    ht->hash_func = hash_func;
176
0
    ht->compare_func = compare_func;
177
0
    ht->alloc = alloc;
178
0
    return ht;
179
0
}
180
181
182
_Py_hashtable_t *
183
_Py_hashtable_new(size_t key_size, size_t data_size,
184
                  _Py_hashtable_hash_func hash_func,
185
                  _Py_hashtable_compare_func compare_func)
186
0
{
187
0
    return _Py_hashtable_new_full(key_size, data_size,
188
0
                                  HASHTABLE_MIN_SIZE,
189
0
                                  hash_func, compare_func,
190
0
                                  NULL);
191
0
}
192
193
194
size_t
195
_Py_hashtable_size(_Py_hashtable_t *ht)
196
0
{
197
0
    size_t size;
198
199
0
    size = sizeof(_Py_hashtable_t);
200
201
    /* buckets */
202
0
    size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
203
204
    /* entries */
205
0
    size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
206
207
0
    return size;
208
0
}
209
210
211
#ifdef Py_DEBUG
212
void
213
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
214
{
215
    size_t size;
216
    size_t chain_len, max_chain_len, total_chain_len, nchains;
217
    _Py_hashtable_entry_t *entry;
218
    size_t hv;
219
    double load;
220
221
    size = _Py_hashtable_size(ht);
222
223
    load = (double)ht->entries / ht->num_buckets;
224
225
    max_chain_len = 0;
226
    total_chain_len = 0;
227
    nchains = 0;
228
    for (hv = 0; hv < ht->num_buckets; hv++) {
229
        entry = TABLE_HEAD(ht, hv);
230
        if (entry != NULL) {
231
            chain_len = 0;
232
            for (; entry; entry = ENTRY_NEXT(entry)) {
233
                chain_len++;
234
            }
235
            if (chain_len > max_chain_len)
236
                max_chain_len = chain_len;
237
            total_chain_len += chain_len;
238
            nchains++;
239
        }
240
    }
241
    printf("hash table %p: entries=%"
242
           PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
243
           (void *)ht, ht->entries, ht->num_buckets, load * 100.0);
244
    if (nchains)
245
        printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
246
    printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
247
           max_chain_len, size / 1024);
248
}
249
#endif
250
251
252
_Py_hashtable_entry_t *
253
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
254
                        size_t key_size, const void *pkey)
255
0
{
256
0
    Py_uhash_t key_hash;
257
0
    size_t index;
258
0
    _Py_hashtable_entry_t *entry;
259
260
0
    assert(key_size == ht->key_size);
261
262
0
    key_hash = ht->hash_func(ht, pkey);
263
0
    index = key_hash & (ht->num_buckets - 1);
264
265
0
    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
266
0
        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
267
0
            break;
268
0
    }
269
270
0
    return entry;
271
0
}
272
273
274
static int
275
_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
276
                        void *data, size_t data_size)
277
0
{
278
0
    Py_uhash_t key_hash;
279
0
    size_t index;
280
0
    _Py_hashtable_entry_t *entry, *previous;
281
282
0
    assert(key_size == ht->key_size);
283
284
0
    key_hash = ht->hash_func(ht, pkey);
285
0
    index = key_hash & (ht->num_buckets - 1);
286
287
0
    previous = NULL;
288
0
    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
289
0
        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
290
0
            break;
291
0
        previous = entry;
292
0
    }
293
294
0
    if (entry == NULL)
295
0
        return 0;
296
297
0
    _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
298
0
                     (_Py_slist_item_t *)entry);
299
0
    ht->entries--;
300
301
0
    if (data != NULL)
302
0
        ENTRY_READ_PDATA(ht, entry, data_size, data);
303
0
    ht->alloc.free(entry);
304
305
0
    if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
306
0
        hashtable_rehash(ht);
307
0
    return 1;
308
0
}
309
310
311
int
312
_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
313
                  size_t data_size, const void *data)
314
0
{
315
0
    Py_uhash_t key_hash;
316
0
    size_t index;
317
0
    _Py_hashtable_entry_t *entry;
318
319
0
    assert(key_size == ht->key_size);
320
321
0
    assert(data != NULL || data_size == 0);
322
#ifndef NDEBUG
323
    /* Don't write the assertion on a single line because it is interesting
324
       to know the duplicated entry if the assertion failed. The entry can
325
       be read using a debugger. */
326
    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
327
    assert(entry == NULL);
328
#endif
329
330
0
    key_hash = ht->hash_func(ht, pkey);
331
0
    index = key_hash & (ht->num_buckets - 1);
332
333
0
    entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
334
0
    if (entry == NULL) {
335
        /* memory allocation failed */
336
0
        return -1;
337
0
    }
338
339
0
    entry->key_hash = key_hash;
340
0
    memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
341
0
    if (data)
342
0
        ENTRY_WRITE_PDATA(ht, entry, data_size, data);
343
344
0
    _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
345
0
    ht->entries++;
346
347
0
    if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
348
0
        hashtable_rehash(ht);
349
0
    return 0;
350
0
}
351
352
353
int
354
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
355
                  size_t data_size, void *data)
356
0
{
357
0
    _Py_hashtable_entry_t *entry;
358
359
0
    assert(data != NULL);
360
361
0
    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
362
0
    if (entry == NULL)
363
0
        return 0;
364
0
    ENTRY_READ_PDATA(ht, entry, data_size, data);
365
0
    return 1;
366
0
}
367
368
369
int
370
_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
371
                  size_t data_size, void *data)
372
0
{
373
0
    assert(data != NULL);
374
0
    return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
375
0
}
376
377
378
/* Code commented since the function is not needed in Python */
379
#if 0
380
void
381
_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
382
{
383
#ifndef NDEBUG
384
    int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
385
    assert(found);
386
#else
387
    (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
388
#endif
389
}
390
#endif
391
392
393
int
394
_Py_hashtable_foreach(_Py_hashtable_t *ht,
395
                      _Py_hashtable_foreach_func func,
396
                      void *arg)
397
0
{
398
0
    _Py_hashtable_entry_t *entry;
399
0
    size_t hv;
400
401
0
    for (hv = 0; hv < ht->num_buckets; hv++) {
402
0
        for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
403
0
            int res = func(ht, entry, arg);
404
0
            if (res)
405
0
                return res;
406
0
        }
407
0
    }
408
0
    return 0;
409
0
}
410
411
412
static void
413
hashtable_rehash(_Py_hashtable_t *ht)
414
0
{
415
0
    size_t buckets_size, new_size, bucket;
416
0
    _Py_slist_t *old_buckets = NULL;
417
0
    size_t old_num_buckets;
418
419
0
    new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
420
0
    if (new_size == ht->num_buckets)
421
0
        return;
422
423
0
    old_num_buckets = ht->num_buckets;
424
425
0
    buckets_size = new_size * sizeof(ht->buckets[0]);
426
0
    old_buckets = ht->buckets;
427
0
    ht->buckets = ht->alloc.malloc(buckets_size);
428
0
    if (ht->buckets == NULL) {
429
        /* cancel rehash on memory allocation failure */
430
0
        ht->buckets = old_buckets ;
431
        /* memory allocation failed */
432
0
        return;
433
0
    }
434
0
    memset(ht->buckets, 0, buckets_size);
435
436
0
    ht->num_buckets = new_size;
437
438
0
    for (bucket = 0; bucket < old_num_buckets; bucket++) {
439
0
        _Py_hashtable_entry_t *entry, *next;
440
0
        for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
441
0
            size_t entry_index;
442
443
444
0
            assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
445
0
            next = ENTRY_NEXT(entry);
446
0
            entry_index = entry->key_hash & (new_size - 1);
447
448
0
            _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
449
0
        }
450
0
    }
451
452
0
    ht->alloc.free(old_buckets);
453
0
}
454
455
456
void
457
_Py_hashtable_clear(_Py_hashtable_t *ht)
458
0
{
459
0
    _Py_hashtable_entry_t *entry, *next;
460
0
    size_t i;
461
462
0
    for (i=0; i < ht->num_buckets; i++) {
463
0
        for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
464
0
            next = ENTRY_NEXT(entry);
465
0
            ht->alloc.free(entry);
466
0
        }
467
0
        _Py_slist_init(&ht->buckets[i]);
468
0
    }
469
0
    ht->entries = 0;
470
0
    hashtable_rehash(ht);
471
0
}
472
473
474
void
475
_Py_hashtable_destroy(_Py_hashtable_t *ht)
476
0
{
477
0
    size_t i;
478
479
0
    for (i = 0; i < ht->num_buckets; i++) {
480
0
        _Py_slist_item_t *entry = ht->buckets[i].head;
481
0
        while (entry) {
482
0
            _Py_slist_item_t *entry_next = entry->next;
483
0
            ht->alloc.free(entry);
484
0
            entry = entry_next;
485
0
        }
486
0
    }
487
488
0
    ht->alloc.free(ht->buckets);
489
0
    ht->alloc.free(ht);
490
0
}
491
492
493
_Py_hashtable_t *
494
_Py_hashtable_copy(_Py_hashtable_t *src)
495
0
{
496
0
    const size_t key_size = src->key_size;
497
0
    const size_t data_size = src->data_size;
498
0
    _Py_hashtable_t *dst;
499
0
    _Py_hashtable_entry_t *entry;
500
0
    size_t bucket;
501
0
    int err;
502
503
0
    dst = _Py_hashtable_new_full(key_size, data_size,
504
0
                                 src->num_buckets,
505
0
                                 src->hash_func,
506
0
                                 src->compare_func,
507
0
                                 &src->alloc);
508
0
    if (dst == NULL)
509
0
        return NULL;
510
511
0
    for (bucket=0; bucket < src->num_buckets; bucket++) {
512
0
        entry = TABLE_HEAD(src, bucket);
513
0
        for (; entry; entry = ENTRY_NEXT(entry)) {
514
0
            const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
515
0
            const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
516
0
            err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
517
0
            if (err) {
518
0
                _Py_hashtable_destroy(dst);
519
0
                return NULL;
520
0
            }
521
0
        }
522
0
    }
523
0
    return dst;
524
0
}