Coverage Report

Created: 2025-06-13 06:18

/src/gdal/port/cpl_hash_set.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 *
3
 * Name:     cpl_hash_set.cpp
4
 * Project:  CPL - Common Portability Library
5
 * Purpose:  Hash set functions.
6
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
7
 *
8
 **********************************************************************
9
 * Copyright (c) 2008-2009, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 ****************************************************************************/
13
14
#include "cpl_hash_set.h"
15
16
#include <cstring>
17
18
#include "cpl_conv.h"
19
#include "cpl_error.h"
20
#include "cpl_list.h"
21
22
struct _CPLHashSet
23
{
24
    CPLHashSetHashFunc fnHashFunc;
25
    CPLHashSetEqualFunc fnEqualFunc;
26
    CPLHashSetFreeEltFunc fnFreeEltFunc;
27
    CPLList **tabList;
28
    int nSize;
29
    int nIndiceAllocatedSize;
30
    int nAllocatedSize;
31
    CPLList *psRecyclingList;
32
    int nRecyclingListSize;
33
    bool bRehash;
34
#ifdef HASH_DEBUG
35
    int nCollisions;
36
#endif
37
};
38
39
constexpr int anPrimes[] = {
40
    53,        97,        193,       389,       769,       1543,     3079,
41
    6151,      12289,     24593,     49157,     98317,     196613,   393241,
42
    786433,    1572869,   3145739,   6291469,   12582917,  25165843, 50331653,
43
    100663319, 201326611, 402653189, 805306457, 1610612741};
44
45
/************************************************************************/
46
/*                          CPLHashSetNew()                             */
47
/************************************************************************/
48
49
/**
50
 * Creates a new hash set
51
 *
52
 * The hash function must return a hash value for the elements to insert.
53
 * If fnHashFunc is NULL, CPLHashSetHashPointer will be used.
54
 *
55
 * The equal function must return if two elements are equal.
56
 * If fnEqualFunc is NULL, CPLHashSetEqualPointer will be used.
57
 *
58
 * The free function is used to free elements inserted in the hash set,
59
 * when the hash set is destroyed, when elements are removed or replaced.
60
 * If fnFreeEltFunc is NULL, elements inserted into the hash set will not be
61
 * freed.
62
 *
63
 * @param fnHashFunc hash function. May be NULL.
64
 * @param fnEqualFunc equal function. May be NULL.
65
 * @param fnFreeEltFunc element free function. May be NULL.
66
 *
67
 * @return a new hash set
68
 */
69
70
CPLHashSet *CPLHashSetNew(CPLHashSetHashFunc fnHashFunc,
71
                          CPLHashSetEqualFunc fnEqualFunc,
72
                          CPLHashSetFreeEltFunc fnFreeEltFunc)
73
0
{
74
0
    CPLHashSet *set = static_cast<CPLHashSet *>(CPLMalloc(sizeof(CPLHashSet)));
75
0
    set->fnHashFunc = fnHashFunc ? fnHashFunc : CPLHashSetHashPointer;
76
0
    set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : CPLHashSetEqualPointer;
77
0
    set->fnFreeEltFunc = fnFreeEltFunc;
78
0
    set->nSize = 0;
79
0
    set->tabList = static_cast<CPLList **>(CPLCalloc(sizeof(CPLList *), 53));
80
0
    set->nIndiceAllocatedSize = 0;
81
0
    set->nAllocatedSize = 53;
82
0
    set->psRecyclingList = nullptr;
83
0
    set->nRecyclingListSize = 0;
84
0
    set->bRehash = false;
85
#ifdef HASH_DEBUG
86
    set->nCollisions = 0;
87
#endif
88
0
    return set;
89
0
}
90
91
/************************************************************************/
92
/*                          CPLHashSetSize()                            */
93
/************************************************************************/
94
95
/**
96
 * Returns the number of elements inserted in the hash set
97
 *
98
 * Note: this is not the internal size of the hash set
99
 *
100
 * @param set the hash set
101
 *
102
 * @return the number of elements in the hash set
103
 */
104
105
int CPLHashSetSize(const CPLHashSet *set)
106
0
{
107
0
    CPLAssert(set != nullptr);
108
0
    return set->nSize;
109
0
}
110
111
/************************************************************************/
112
/*                       CPLHashSetGetNewListElt()                      */
113
/************************************************************************/
114
115
static CPLList *CPLHashSetGetNewListElt(CPLHashSet *set)
116
0
{
117
0
    if (set->psRecyclingList)
118
0
    {
119
0
        CPLList *psRet = set->psRecyclingList;
120
0
        psRet->pData = nullptr;
121
0
        set->nRecyclingListSize--;
122
0
        set->psRecyclingList = psRet->psNext;
123
0
        return psRet;
124
0
    }
125
126
0
    return static_cast<CPLList *>(CPLMalloc(sizeof(CPLList)));
127
0
}
128
129
/************************************************************************/
130
/*                       CPLHashSetReturnListElt()                      */
131
/************************************************************************/
132
133
static void CPLHashSetReturnListElt(CPLHashSet *set, CPLList *psList)
134
0
{
135
0
    if (set->nRecyclingListSize < 128)
136
0
    {
137
0
        psList->psNext = set->psRecyclingList;
138
0
        set->psRecyclingList = psList;
139
0
        set->nRecyclingListSize++;
140
0
    }
141
0
    else
142
0
    {
143
0
        CPLFree(psList);
144
0
    }
145
0
}
146
147
/************************************************************************/
148
/*                   CPLHashSetClearInternal()                          */
149
/************************************************************************/
150
151
static void CPLHashSetClearInternal(CPLHashSet *set, bool bFinalize)
152
0
{
153
0
    CPLAssert(set != nullptr);
154
0
    for (int i = 0; i < set->nAllocatedSize; i++)
155
0
    {
156
0
        CPLList *cur = set->tabList[i];
157
0
        while (cur)
158
0
        {
159
0
            if (set->fnFreeEltFunc)
160
0
                set->fnFreeEltFunc(cur->pData);
161
0
            CPLList *psNext = cur->psNext;
162
0
            if (bFinalize)
163
0
                CPLFree(cur);
164
0
            else
165
0
                CPLHashSetReturnListElt(set, cur);
166
0
            cur = psNext;
167
0
        }
168
0
        set->tabList[i] = nullptr;
169
0
    }
170
0
    set->bRehash = false;
171
0
}
172
173
/************************************************************************/
174
/*                        CPLHashSetDestroy()                           */
175
/************************************************************************/
176
177
/**
178
 * Destroys an allocated hash set.
179
 *
180
 * This function also frees the elements if a free function was
181
 * provided at the creation of the hash set.
182
 *
183
 * @param set the hash set
184
 */
185
186
void CPLHashSetDestroy(CPLHashSet *set)
187
0
{
188
0
    CPLHashSetClearInternal(set, true);
189
0
    CPLFree(set->tabList);
190
0
    CPLListDestroy(set->psRecyclingList);
191
0
    CPLFree(set);
192
0
}
193
194
/************************************************************************/
195
/*                        CPLHashSetClear()                             */
196
/************************************************************************/
197
198
/**
199
 * Clear all elements from a hash set.
200
 *
201
 * This function also frees the elements if a free function was
202
 * provided at the creation of the hash set.
203
 *
204
 * @param set the hash set
205
 * @since GDAL 2.1
206
 */
207
208
void CPLHashSetClear(CPLHashSet *set)
209
0
{
210
0
    CPLHashSetClearInternal(set, false);
211
0
    set->tabList = static_cast<CPLList **>(
212
0
        CPLRealloc(set->tabList, sizeof(CPLList *) * 53));
213
0
    set->nIndiceAllocatedSize = 0;
214
0
    set->nAllocatedSize = 53;
215
#ifdef HASH_DEBUG
216
    set->nCollisions = 0;
217
#endif
218
0
    set->nSize = 0;
219
0
}
220
221
/************************************************************************/
222
/*                       CPLHashSetForeach()                            */
223
/************************************************************************/
224
225
/**
226
 * Walk through the hash set and runs the provided function on all the
227
 * elements
228
 *
229
 * This function is provided the user_data argument of CPLHashSetForeach.
230
 * It must return TRUE to go on the walk through the hash set, or FALSE to
231
 * make it stop.
232
 *
233
 * Note : the structure of the hash set must *NOT* be modified during the
234
 * walk.
235
 *
236
 * @param set the hash set.
237
 * @param fnIterFunc the function called on each element.
238
 * @param user_data the user data provided to the function.
239
 */
240
241
void CPLHashSetForeach(CPLHashSet *set, CPLHashSetIterEltFunc fnIterFunc,
242
                       void *user_data)
243
0
{
244
0
    CPLAssert(set != nullptr);
245
0
    if (!fnIterFunc)
246
0
        return;
247
248
0
    for (int i = 0; i < set->nAllocatedSize; i++)
249
0
    {
250
0
        CPLList *cur = set->tabList[i];
251
0
        while (cur)
252
0
        {
253
0
            if (!fnIterFunc(cur->pData, user_data))
254
0
                return;
255
256
0
            cur = cur->psNext;
257
0
        }
258
0
    }
259
0
}
260
261
/************************************************************************/
262
/*                        CPLHashSetRehash()                            */
263
/************************************************************************/
264
265
static void CPLHashSetRehash(CPLHashSet *set)
266
0
{
267
0
    int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize];
268
0
    CPLList **newTabList = static_cast<CPLList **>(
269
0
        CPLCalloc(sizeof(CPLList *), nNewAllocatedSize));
270
#ifdef HASH_DEBUG
271
    CPLDebug("CPLHASH",
272
             "hashSet=%p, nSize=%d, nCollisions=%d, "
273
             "fCollisionRate=%.02f",
274
             set, set->nSize, set->nCollisions,
275
             set->nCollisions * 100.0 / set->nSize);
276
    set->nCollisions = 0;
277
#endif
278
0
    for (int i = 0; i < set->nAllocatedSize; i++)
279
0
    {
280
0
        CPLList *cur = set->tabList[i];
281
0
        while (cur)
282
0
        {
283
0
            const unsigned long nNewHashVal =
284
0
                set->fnHashFunc(cur->pData) % nNewAllocatedSize;
285
#ifdef HASH_DEBUG
286
            if (newTabList[nNewHashVal])
287
                set->nCollisions++;
288
#endif
289
0
            CPLList *psNext = cur->psNext;
290
0
            cur->psNext = newTabList[nNewHashVal];
291
0
            newTabList[nNewHashVal] = cur;
292
0
            cur = psNext;
293
0
        }
294
0
    }
295
0
    CPLFree(set->tabList);
296
0
    set->tabList = newTabList;
297
0
    set->nAllocatedSize = nNewAllocatedSize;
298
0
    set->bRehash = false;
299
0
}
300
301
/************************************************************************/
302
/*                        CPLHashSetFindPtr()                           */
303
/************************************************************************/
304
305
static void **CPLHashSetFindPtr(CPLHashSet *set, const void *elt)
306
0
{
307
0
    const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
308
0
    CPLList *cur = set->tabList[nHashVal];
309
0
    while (cur)
310
0
    {
311
0
        if (set->fnEqualFunc(cur->pData, elt))
312
0
            return &cur->pData;
313
0
        cur = cur->psNext;
314
0
    }
315
0
    return nullptr;
316
0
}
317
318
/************************************************************************/
319
/*                         CPLHashSetInsert()                           */
320
/************************************************************************/
321
322
/**
323
 * Inserts an element into a hash set.
324
 *
325
 * If the element was already inserted in the hash set, the previous
326
 * element is replaced by the new element. If a free function was provided,
327
 * it is used to free the previously inserted element
328
 *
329
 * @param set the hash set
330
 * @param elt the new element to insert in the hash set
331
 *
332
 * @return TRUE if the element was not already in the hash set
333
 */
334
335
int CPLHashSetInsert(CPLHashSet *set, void *elt)
336
0
{
337
0
    CPLAssert(set != nullptr);
338
0
    void **pElt = CPLHashSetFindPtr(set, elt);
339
0
    if (pElt)
340
0
    {
341
0
        if (set->fnFreeEltFunc)
342
0
            set->fnFreeEltFunc(*pElt);
343
344
0
        *pElt = elt;
345
0
        return FALSE;
346
0
    }
347
348
0
    if (set->nSize >= 2 * set->nAllocatedSize / 3 ||
349
0
        (set->bRehash && set->nIndiceAllocatedSize > 0 &&
350
0
         set->nSize <= set->nAllocatedSize / 2))
351
0
    {
352
0
        set->nIndiceAllocatedSize++;
353
0
        CPLHashSetRehash(set);
354
0
    }
355
356
0
    const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
357
#ifdef HASH_DEBUG
358
    if (set->tabList[nHashVal])
359
        set->nCollisions++;
360
#endif
361
362
0
    CPLList *new_elt = CPLHashSetGetNewListElt(set);
363
0
    new_elt->pData = elt;
364
0
    new_elt->psNext = set->tabList[nHashVal];
365
0
    set->tabList[nHashVal] = new_elt;
366
0
    set->nSize++;
367
368
0
    return TRUE;
369
0
}
370
371
/************************************************************************/
372
/*                        CPLHashSetLookup()                            */
373
/************************************************************************/
374
375
/**
376
 * Returns the element found in the hash set corresponding to the element to
377
 * look up The element must not be modified.
378
 *
379
 * @param set the hash set
380
 * @param elt the element to look up in the hash set
381
 *
382
 * @return the element found in the hash set or NULL
383
 */
384
385
void *CPLHashSetLookup(CPLHashSet *set, const void *elt)
386
0
{
387
0
    CPLAssert(set != nullptr);
388
0
    void **pElt = CPLHashSetFindPtr(set, elt);
389
0
    if (pElt)
390
0
        return *pElt;
391
392
0
    return nullptr;
393
0
}
394
395
/************************************************************************/
396
/*                     CPLHashSetRemoveInternal()                       */
397
/************************************************************************/
398
399
static bool CPLHashSetRemoveInternal(CPLHashSet *set, const void *elt,
400
                                     bool bDeferRehash)
401
0
{
402
0
    CPLAssert(set != nullptr);
403
0
    if (set->nIndiceAllocatedSize > 0 && set->nSize <= set->nAllocatedSize / 2)
404
0
    {
405
0
        set->nIndiceAllocatedSize--;
406
0
        if (bDeferRehash)
407
0
            set->bRehash = true;
408
0
        else
409
0
            CPLHashSetRehash(set);
410
0
    }
411
412
0
    int nHashVal = static_cast<int>(set->fnHashFunc(elt) % set->nAllocatedSize);
413
0
    CPLList *cur = set->tabList[nHashVal];
414
0
    CPLList *prev = nullptr;
415
0
    while (cur)
416
0
    {
417
0
        if (set->fnEqualFunc(cur->pData, elt))
418
0
        {
419
0
            if (prev)
420
0
                prev->psNext = cur->psNext;
421
0
            else
422
0
                set->tabList[nHashVal] = cur->psNext;
423
424
0
            if (set->fnFreeEltFunc)
425
0
                set->fnFreeEltFunc(cur->pData);
426
427
0
            CPLHashSetReturnListElt(set, cur);
428
#ifdef HASH_DEBUG
429
            if (set->tabList[nHashVal])
430
                set->nCollisions--;
431
#endif
432
0
            set->nSize--;
433
0
            return true;
434
0
        }
435
0
        prev = cur;
436
0
        cur = cur->psNext;
437
0
    }
438
0
    return false;
439
0
}
440
441
/************************************************************************/
442
/*                         CPLHashSetRemove()                           */
443
/************************************************************************/
444
445
/**
446
 * Removes an element from a hash set
447
 *
448
 * @param set the hash set
449
 * @param elt the new element to remove from the hash set
450
 *
451
 * @return TRUE if the element was in the hash set
452
 */
453
454
int CPLHashSetRemove(CPLHashSet *set, const void *elt)
455
0
{
456
0
    return CPLHashSetRemoveInternal(set, elt, false);
457
0
}
458
459
/************************************************************************/
460
/*                     CPLHashSetRemoveDeferRehash()                    */
461
/************************************************************************/
462
463
/**
464
 * Removes an element from a hash set.
465
 *
466
 * This will defer potential rehashing of the set to later calls to
467
 * CPLHashSetInsert() or CPLHashSetRemove().
468
 *
469
 * @param set the hash set
470
 * @param elt the new element to remove from the hash set
471
 *
472
 * @return TRUE if the element was in the hash set
473
 * @since GDAL 2.1
474
 */
475
476
int CPLHashSetRemoveDeferRehash(CPLHashSet *set, const void *elt)
477
0
{
478
0
    return CPLHashSetRemoveInternal(set, elt, true);
479
0
}
480
481
/************************************************************************/
482
/*                    CPLHashSetHashPointer()                           */
483
/************************************************************************/
484
485
/**
486
 * Hash function for an arbitrary pointer
487
 *
488
 * @param elt the arbitrary pointer to hash
489
 *
490
 * @return the hash value of the pointer
491
 */
492
493
unsigned long CPLHashSetHashPointer(const void *elt)
494
0
{
495
0
    return static_cast<unsigned long>(
496
0
        reinterpret_cast<GUIntptr_t>(const_cast<void *>(elt)));
497
0
}
498
499
/************************************************************************/
500
/*                   CPLHashSetEqualPointer()                           */
501
/************************************************************************/
502
503
/**
504
 * Equality function for arbitrary pointers
505
 *
506
 * @param elt1 the first arbitrary pointer to compare
507
 * @param elt2 the second arbitrary pointer to compare
508
 *
509
 * @return TRUE if the pointers are equal
510
 */
511
512
int CPLHashSetEqualPointer(const void *elt1, const void *elt2)
513
0
{
514
0
    return elt1 == elt2;
515
0
}
516
517
/************************************************************************/
518
/*                        CPLHashSetHashStr()                           */
519
/************************************************************************/
520
521
/**
522
 * Hash function for a zero-terminated string
523
 *
524
 * @param elt the string to hash. May be NULL.
525
 *
526
 * @return the hash value of the string
527
 */
528
529
CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
530
unsigned long CPLHashSetHashStr(const void *elt)
531
0
{
532
0
    if (elt == nullptr)
533
0
        return 0;
534
535
0
    const unsigned char *pszStr = static_cast<const unsigned char *>(elt);
536
0
    unsigned long hash = 0;
537
538
0
    int c = 0;
539
0
    while ((c = *pszStr++) != '\0')
540
0
        hash = c + (hash << 6) + (hash << 16) - hash;
541
542
0
    return hash;
543
0
}
544
545
/************************************************************************/
546
/*                     CPLHashSetEqualStr()                             */
547
/************************************************************************/
548
549
/**
550
 * Equality function for strings
551
 *
552
 * @param elt1 the first string to compare. May be NULL.
553
 * @param elt2 the second string to compare. May be NULL.
554
 *
555
 * @return TRUE if the strings are equal
556
 */
557
558
int CPLHashSetEqualStr(const void *elt1, const void *elt2)
559
0
{
560
0
    const char *pszStr1 = static_cast<const char *>(elt1);
561
0
    const char *pszStr2 = static_cast<const char *>(elt2);
562
563
0
    if (pszStr1 == nullptr && pszStr2 != nullptr)
564
0
        return FALSE;
565
566
0
    if (pszStr1 != nullptr && pszStr2 == nullptr)
567
0
        return FALSE;
568
569
0
    if (pszStr1 == nullptr && pszStr2 == nullptr)
570
0
        return TRUE;
571
572
0
    return strcmp(pszStr1, pszStr2) == 0;
573
0
}