/src/gdal/port/cpl_hash_set.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * |
3 | | * Name: cpl_hash_set.cpp |
4 | | * Project: CPL - Common Portability Library |
5 | | * Purpose: Hash set functions. |
6 | | * Author: Even Rouault, <even dot rouault at spatialys.com> |
7 | | * |
8 | | ********************************************************************** |
9 | | * Copyright (c) 2008-2009, Even Rouault <even dot rouault at spatialys.com> |
10 | | * |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #include "cpl_hash_set.h" |
15 | | |
16 | | #include <cstring> |
17 | | |
18 | | #include "cpl_conv.h" |
19 | | #include "cpl_error.h" |
20 | | #include "cpl_list.h" |
21 | | |
22 | | struct _CPLHashSet |
23 | | { |
24 | | CPLHashSetHashFunc fnHashFunc; |
25 | | CPLHashSetEqualFunc fnEqualFunc; |
26 | | CPLHashSetFreeEltFunc fnFreeEltFunc; |
27 | | CPLList **tabList; |
28 | | int nSize; |
29 | | int nIndiceAllocatedSize; |
30 | | int nAllocatedSize; |
31 | | CPLList *psRecyclingList; |
32 | | int nRecyclingListSize; |
33 | | bool bRehash; |
34 | | #ifdef HASH_DEBUG |
35 | | int nCollisions; |
36 | | #endif |
37 | | }; |
38 | | |
39 | | constexpr int anPrimes[] = { |
40 | | 53, 97, 193, 389, 769, 1543, 3079, |
41 | | 6151, 12289, 24593, 49157, 98317, 196613, 393241, |
42 | | 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, |
43 | | 100663319, 201326611, 402653189, 805306457, 1610612741}; |
44 | | |
45 | | /************************************************************************/ |
46 | | /* CPLHashSetNew() */ |
47 | | /************************************************************************/ |
48 | | |
49 | | /** |
50 | | * Creates a new hash set |
51 | | * |
52 | | * The hash function must return a hash value for the elements to insert. |
53 | | * If fnHashFunc is NULL, CPLHashSetHashPointer will be used. |
54 | | * |
55 | | * The equal function must return if two elements are equal. |
56 | | * If fnEqualFunc is NULL, CPLHashSetEqualPointer will be used. |
57 | | * |
58 | | * The free function is used to free elements inserted in the hash set, |
59 | | * when the hash set is destroyed, when elements are removed or replaced. |
60 | | * If fnFreeEltFunc is NULL, elements inserted into the hash set will not be |
61 | | * freed. |
62 | | * |
63 | | * @param fnHashFunc hash function. May be NULL. |
64 | | * @param fnEqualFunc equal function. May be NULL. |
65 | | * @param fnFreeEltFunc element free function. May be NULL. |
66 | | * |
67 | | * @return a new hash set |
68 | | */ |
69 | | |
70 | | CPLHashSet *CPLHashSetNew(CPLHashSetHashFunc fnHashFunc, |
71 | | CPLHashSetEqualFunc fnEqualFunc, |
72 | | CPLHashSetFreeEltFunc fnFreeEltFunc) |
73 | 0 | { |
74 | 0 | CPLHashSet *set = static_cast<CPLHashSet *>(CPLMalloc(sizeof(CPLHashSet))); |
75 | 0 | set->fnHashFunc = fnHashFunc ? fnHashFunc : CPLHashSetHashPointer; |
76 | 0 | set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : CPLHashSetEqualPointer; |
77 | 0 | set->fnFreeEltFunc = fnFreeEltFunc; |
78 | 0 | set->nSize = 0; |
79 | 0 | set->tabList = static_cast<CPLList **>(CPLCalloc(sizeof(CPLList *), 53)); |
80 | 0 | set->nIndiceAllocatedSize = 0; |
81 | 0 | set->nAllocatedSize = 53; |
82 | 0 | set->psRecyclingList = nullptr; |
83 | 0 | set->nRecyclingListSize = 0; |
84 | 0 | set->bRehash = false; |
85 | | #ifdef HASH_DEBUG |
86 | | set->nCollisions = 0; |
87 | | #endif |
88 | 0 | return set; |
89 | 0 | } |
90 | | |
91 | | /************************************************************************/ |
92 | | /* CPLHashSetSize() */ |
93 | | /************************************************************************/ |
94 | | |
95 | | /** |
96 | | * Returns the number of elements inserted in the hash set |
97 | | * |
98 | | * Note: this is not the internal size of the hash set |
99 | | * |
100 | | * @param set the hash set |
101 | | * |
102 | | * @return the number of elements in the hash set |
103 | | */ |
104 | | |
105 | | int CPLHashSetSize(const CPLHashSet *set) |
106 | 0 | { |
107 | 0 | CPLAssert(set != nullptr); |
108 | 0 | return set->nSize; |
109 | 0 | } |
110 | | |
111 | | /************************************************************************/ |
112 | | /* CPLHashSetGetNewListElt() */ |
113 | | /************************************************************************/ |
114 | | |
115 | | static CPLList *CPLHashSetGetNewListElt(CPLHashSet *set) |
116 | 0 | { |
117 | 0 | if (set->psRecyclingList) |
118 | 0 | { |
119 | 0 | CPLList *psRet = set->psRecyclingList; |
120 | 0 | psRet->pData = nullptr; |
121 | 0 | set->nRecyclingListSize--; |
122 | 0 | set->psRecyclingList = psRet->psNext; |
123 | 0 | return psRet; |
124 | 0 | } |
125 | | |
126 | 0 | return static_cast<CPLList *>(CPLMalloc(sizeof(CPLList))); |
127 | 0 | } |
128 | | |
129 | | /************************************************************************/ |
130 | | /* CPLHashSetReturnListElt() */ |
131 | | /************************************************************************/ |
132 | | |
133 | | static void CPLHashSetReturnListElt(CPLHashSet *set, CPLList *psList) |
134 | 0 | { |
135 | 0 | if (set->nRecyclingListSize < 128) |
136 | 0 | { |
137 | 0 | psList->psNext = set->psRecyclingList; |
138 | 0 | set->psRecyclingList = psList; |
139 | 0 | set->nRecyclingListSize++; |
140 | 0 | } |
141 | 0 | else |
142 | 0 | { |
143 | 0 | CPLFree(psList); |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | /************************************************************************/ |
148 | | /* CPLHashSetClearInternal() */ |
149 | | /************************************************************************/ |
150 | | |
151 | | static void CPLHashSetClearInternal(CPLHashSet *set, bool bFinalize) |
152 | 0 | { |
153 | 0 | CPLAssert(set != nullptr); |
154 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
155 | 0 | { |
156 | 0 | CPLList *cur = set->tabList[i]; |
157 | 0 | while (cur) |
158 | 0 | { |
159 | 0 | if (set->fnFreeEltFunc) |
160 | 0 | set->fnFreeEltFunc(cur->pData); |
161 | 0 | CPLList *psNext = cur->psNext; |
162 | 0 | if (bFinalize) |
163 | 0 | CPLFree(cur); |
164 | 0 | else |
165 | 0 | CPLHashSetReturnListElt(set, cur); |
166 | 0 | cur = psNext; |
167 | 0 | } |
168 | 0 | set->tabList[i] = nullptr; |
169 | 0 | } |
170 | 0 | set->bRehash = false; |
171 | 0 | } |
172 | | |
173 | | /************************************************************************/ |
174 | | /* CPLHashSetDestroy() */ |
175 | | /************************************************************************/ |
176 | | |
177 | | /** |
178 | | * Destroys an allocated hash set. |
179 | | * |
180 | | * This function also frees the elements if a free function was |
181 | | * provided at the creation of the hash set. |
182 | | * |
183 | | * @param set the hash set |
184 | | */ |
185 | | |
186 | | void CPLHashSetDestroy(CPLHashSet *set) |
187 | 0 | { |
188 | 0 | CPLHashSetClearInternal(set, true); |
189 | 0 | CPLFree(set->tabList); |
190 | 0 | CPLListDestroy(set->psRecyclingList); |
191 | 0 | CPLFree(set); |
192 | 0 | } |
193 | | |
194 | | /************************************************************************/ |
195 | | /* CPLHashSetClear() */ |
196 | | /************************************************************************/ |
197 | | |
198 | | /** |
199 | | * Clear all elements from a hash set. |
200 | | * |
201 | | * This function also frees the elements if a free function was |
202 | | * provided at the creation of the hash set. |
203 | | * |
204 | | * @param set the hash set |
205 | | * @since GDAL 2.1 |
206 | | */ |
207 | | |
208 | | void CPLHashSetClear(CPLHashSet *set) |
209 | 0 | { |
210 | 0 | CPLHashSetClearInternal(set, false); |
211 | 0 | set->tabList = static_cast<CPLList **>( |
212 | 0 | CPLRealloc(set->tabList, sizeof(CPLList *) * 53)); |
213 | 0 | set->nIndiceAllocatedSize = 0; |
214 | 0 | set->nAllocatedSize = 53; |
215 | | #ifdef HASH_DEBUG |
216 | | set->nCollisions = 0; |
217 | | #endif |
218 | 0 | set->nSize = 0; |
219 | 0 | } |
220 | | |
221 | | /************************************************************************/ |
222 | | /* CPLHashSetForeach() */ |
223 | | /************************************************************************/ |
224 | | |
225 | | /** |
226 | | * Walk through the hash set and runs the provided function on all the |
227 | | * elements |
228 | | * |
229 | | * This function is provided the user_data argument of CPLHashSetForeach. |
230 | | * It must return TRUE to go on the walk through the hash set, or FALSE to |
231 | | * make it stop. |
232 | | * |
233 | | * Note : the structure of the hash set must *NOT* be modified during the |
234 | | * walk. |
235 | | * |
236 | | * @param set the hash set. |
237 | | * @param fnIterFunc the function called on each element. |
238 | | * @param user_data the user data provided to the function. |
239 | | */ |
240 | | |
241 | | void CPLHashSetForeach(CPLHashSet *set, CPLHashSetIterEltFunc fnIterFunc, |
242 | | void *user_data) |
243 | 0 | { |
244 | 0 | CPLAssert(set != nullptr); |
245 | 0 | if (!fnIterFunc) |
246 | 0 | return; |
247 | | |
248 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
249 | 0 | { |
250 | 0 | CPLList *cur = set->tabList[i]; |
251 | 0 | while (cur) |
252 | 0 | { |
253 | 0 | if (!fnIterFunc(cur->pData, user_data)) |
254 | 0 | return; |
255 | | |
256 | 0 | cur = cur->psNext; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | } |
260 | | |
261 | | /************************************************************************/ |
262 | | /* CPLHashSetRehash() */ |
263 | | /************************************************************************/ |
264 | | |
265 | | static void CPLHashSetRehash(CPLHashSet *set) |
266 | 0 | { |
267 | 0 | int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize]; |
268 | 0 | CPLList **newTabList = static_cast<CPLList **>( |
269 | 0 | CPLCalloc(sizeof(CPLList *), nNewAllocatedSize)); |
270 | | #ifdef HASH_DEBUG |
271 | | CPLDebug("CPLHASH", |
272 | | "hashSet=%p, nSize=%d, nCollisions=%d, " |
273 | | "fCollisionRate=%.02f", |
274 | | set, set->nSize, set->nCollisions, |
275 | | set->nCollisions * 100.0 / set->nSize); |
276 | | set->nCollisions = 0; |
277 | | #endif |
278 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
279 | 0 | { |
280 | 0 | CPLList *cur = set->tabList[i]; |
281 | 0 | while (cur) |
282 | 0 | { |
283 | 0 | const unsigned long nNewHashVal = |
284 | 0 | set->fnHashFunc(cur->pData) % nNewAllocatedSize; |
285 | | #ifdef HASH_DEBUG |
286 | | if (newTabList[nNewHashVal]) |
287 | | set->nCollisions++; |
288 | | #endif |
289 | 0 | CPLList *psNext = cur->psNext; |
290 | 0 | cur->psNext = newTabList[nNewHashVal]; |
291 | 0 | newTabList[nNewHashVal] = cur; |
292 | 0 | cur = psNext; |
293 | 0 | } |
294 | 0 | } |
295 | 0 | CPLFree(set->tabList); |
296 | 0 | set->tabList = newTabList; |
297 | 0 | set->nAllocatedSize = nNewAllocatedSize; |
298 | 0 | set->bRehash = false; |
299 | 0 | } |
300 | | |
301 | | /************************************************************************/ |
302 | | /* CPLHashSetFindPtr() */ |
303 | | /************************************************************************/ |
304 | | |
305 | | static void **CPLHashSetFindPtr(CPLHashSet *set, const void *elt) |
306 | 0 | { |
307 | 0 | const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize; |
308 | 0 | CPLList *cur = set->tabList[nHashVal]; |
309 | 0 | while (cur) |
310 | 0 | { |
311 | 0 | if (set->fnEqualFunc(cur->pData, elt)) |
312 | 0 | return &cur->pData; |
313 | 0 | cur = cur->psNext; |
314 | 0 | } |
315 | 0 | return nullptr; |
316 | 0 | } |
317 | | |
318 | | /************************************************************************/ |
319 | | /* CPLHashSetInsert() */ |
320 | | /************************************************************************/ |
321 | | |
322 | | /** |
323 | | * Inserts an element into a hash set. |
324 | | * |
325 | | * If the element was already inserted in the hash set, the previous |
326 | | * element is replaced by the new element. If a free function was provided, |
327 | | * it is used to free the previously inserted element |
328 | | * |
329 | | * @param set the hash set |
330 | | * @param elt the new element to insert in the hash set |
331 | | * |
332 | | * @return TRUE if the element was not already in the hash set |
333 | | */ |
334 | | |
335 | | int CPLHashSetInsert(CPLHashSet *set, void *elt) |
336 | 0 | { |
337 | 0 | CPLAssert(set != nullptr); |
338 | 0 | void **pElt = CPLHashSetFindPtr(set, elt); |
339 | 0 | if (pElt) |
340 | 0 | { |
341 | 0 | if (set->fnFreeEltFunc) |
342 | 0 | set->fnFreeEltFunc(*pElt); |
343 | |
|
344 | 0 | *pElt = elt; |
345 | 0 | return FALSE; |
346 | 0 | } |
347 | | |
348 | 0 | if (set->nSize >= 2 * set->nAllocatedSize / 3 || |
349 | 0 | (set->bRehash && set->nIndiceAllocatedSize > 0 && |
350 | 0 | set->nSize <= set->nAllocatedSize / 2)) |
351 | 0 | { |
352 | 0 | set->nIndiceAllocatedSize++; |
353 | 0 | CPLHashSetRehash(set); |
354 | 0 | } |
355 | |
|
356 | 0 | const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize; |
357 | | #ifdef HASH_DEBUG |
358 | | if (set->tabList[nHashVal]) |
359 | | set->nCollisions++; |
360 | | #endif |
361 | |
|
362 | 0 | CPLList *new_elt = CPLHashSetGetNewListElt(set); |
363 | 0 | new_elt->pData = elt; |
364 | 0 | new_elt->psNext = set->tabList[nHashVal]; |
365 | 0 | set->tabList[nHashVal] = new_elt; |
366 | 0 | set->nSize++; |
367 | |
|
368 | 0 | return TRUE; |
369 | 0 | } |
370 | | |
371 | | /************************************************************************/ |
372 | | /* CPLHashSetLookup() */ |
373 | | /************************************************************************/ |
374 | | |
375 | | /** |
376 | | * Returns the element found in the hash set corresponding to the element to |
377 | | * look up The element must not be modified. |
378 | | * |
379 | | * @param set the hash set |
380 | | * @param elt the element to look up in the hash set |
381 | | * |
382 | | * @return the element found in the hash set or NULL |
383 | | */ |
384 | | |
385 | | void *CPLHashSetLookup(CPLHashSet *set, const void *elt) |
386 | 0 | { |
387 | 0 | CPLAssert(set != nullptr); |
388 | 0 | void **pElt = CPLHashSetFindPtr(set, elt); |
389 | 0 | if (pElt) |
390 | 0 | return *pElt; |
391 | | |
392 | 0 | return nullptr; |
393 | 0 | } |
394 | | |
395 | | /************************************************************************/ |
396 | | /* CPLHashSetRemoveInternal() */ |
397 | | /************************************************************************/ |
398 | | |
399 | | static bool CPLHashSetRemoveInternal(CPLHashSet *set, const void *elt, |
400 | | bool bDeferRehash) |
401 | 0 | { |
402 | 0 | CPLAssert(set != nullptr); |
403 | 0 | if (set->nIndiceAllocatedSize > 0 && set->nSize <= set->nAllocatedSize / 2) |
404 | 0 | { |
405 | 0 | set->nIndiceAllocatedSize--; |
406 | 0 | if (bDeferRehash) |
407 | 0 | set->bRehash = true; |
408 | 0 | else |
409 | 0 | CPLHashSetRehash(set); |
410 | 0 | } |
411 | |
|
412 | 0 | int nHashVal = static_cast<int>(set->fnHashFunc(elt) % set->nAllocatedSize); |
413 | 0 | CPLList *cur = set->tabList[nHashVal]; |
414 | 0 | CPLList *prev = nullptr; |
415 | 0 | while (cur) |
416 | 0 | { |
417 | 0 | if (set->fnEqualFunc(cur->pData, elt)) |
418 | 0 | { |
419 | 0 | if (prev) |
420 | 0 | prev->psNext = cur->psNext; |
421 | 0 | else |
422 | 0 | set->tabList[nHashVal] = cur->psNext; |
423 | |
|
424 | 0 | if (set->fnFreeEltFunc) |
425 | 0 | set->fnFreeEltFunc(cur->pData); |
426 | |
|
427 | 0 | CPLHashSetReturnListElt(set, cur); |
428 | | #ifdef HASH_DEBUG |
429 | | if (set->tabList[nHashVal]) |
430 | | set->nCollisions--; |
431 | | #endif |
432 | 0 | set->nSize--; |
433 | 0 | return true; |
434 | 0 | } |
435 | 0 | prev = cur; |
436 | 0 | cur = cur->psNext; |
437 | 0 | } |
438 | 0 | return false; |
439 | 0 | } |
440 | | |
441 | | /************************************************************************/ |
442 | | /* CPLHashSetRemove() */ |
443 | | /************************************************************************/ |
444 | | |
445 | | /** |
446 | | * Removes an element from a hash set |
447 | | * |
448 | | * @param set the hash set |
449 | | * @param elt the new element to remove from the hash set |
450 | | * |
451 | | * @return TRUE if the element was in the hash set |
452 | | */ |
453 | | |
454 | | int CPLHashSetRemove(CPLHashSet *set, const void *elt) |
455 | 0 | { |
456 | 0 | return CPLHashSetRemoveInternal(set, elt, false); |
457 | 0 | } |
458 | | |
459 | | /************************************************************************/ |
460 | | /* CPLHashSetRemoveDeferRehash() */ |
461 | | /************************************************************************/ |
462 | | |
463 | | /** |
464 | | * Removes an element from a hash set. |
465 | | * |
466 | | * This will defer potential rehashing of the set to later calls to |
467 | | * CPLHashSetInsert() or CPLHashSetRemove(). |
468 | | * |
469 | | * @param set the hash set |
470 | | * @param elt the new element to remove from the hash set |
471 | | * |
472 | | * @return TRUE if the element was in the hash set |
473 | | * @since GDAL 2.1 |
474 | | */ |
475 | | |
476 | | int CPLHashSetRemoveDeferRehash(CPLHashSet *set, const void *elt) |
477 | 0 | { |
478 | 0 | return CPLHashSetRemoveInternal(set, elt, true); |
479 | 0 | } |
480 | | |
481 | | /************************************************************************/ |
482 | | /* CPLHashSetHashPointer() */ |
483 | | /************************************************************************/ |
484 | | |
485 | | /** |
486 | | * Hash function for an arbitrary pointer |
487 | | * |
488 | | * @param elt the arbitrary pointer to hash |
489 | | * |
490 | | * @return the hash value of the pointer |
491 | | */ |
492 | | |
493 | | unsigned long CPLHashSetHashPointer(const void *elt) |
494 | 0 | { |
495 | 0 | return static_cast<unsigned long>( |
496 | 0 | reinterpret_cast<GUIntptr_t>(const_cast<void *>(elt))); |
497 | 0 | } |
498 | | |
499 | | /************************************************************************/ |
500 | | /* CPLHashSetEqualPointer() */ |
501 | | /************************************************************************/ |
502 | | |
503 | | /** |
504 | | * Equality function for arbitrary pointers |
505 | | * |
506 | | * @param elt1 the first arbitrary pointer to compare |
507 | | * @param elt2 the second arbitrary pointer to compare |
508 | | * |
509 | | * @return TRUE if the pointers are equal |
510 | | */ |
511 | | |
512 | | int CPLHashSetEqualPointer(const void *elt1, const void *elt2) |
513 | 0 | { |
514 | 0 | return elt1 == elt2; |
515 | 0 | } |
516 | | |
517 | | /************************************************************************/ |
518 | | /* CPLHashSetHashStr() */ |
519 | | /************************************************************************/ |
520 | | |
521 | | /** |
522 | | * Hash function for a zero-terminated string |
523 | | * |
524 | | * @param elt the string to hash. May be NULL. |
525 | | * |
526 | | * @return the hash value of the string |
527 | | */ |
528 | | |
529 | | CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW |
530 | | unsigned long CPLHashSetHashStr(const void *elt) |
531 | 0 | { |
532 | 0 | if (elt == nullptr) |
533 | 0 | return 0; |
534 | | |
535 | 0 | const unsigned char *pszStr = static_cast<const unsigned char *>(elt); |
536 | 0 | unsigned long hash = 0; |
537 | |
|
538 | 0 | int c = 0; |
539 | 0 | while ((c = *pszStr++) != '\0') |
540 | 0 | hash = c + (hash << 6) + (hash << 16) - hash; |
541 | |
|
542 | 0 | return hash; |
543 | 0 | } |
544 | | |
545 | | /************************************************************************/ |
546 | | /* CPLHashSetEqualStr() */ |
547 | | /************************************************************************/ |
548 | | |
549 | | /** |
550 | | * Equality function for strings |
551 | | * |
552 | | * @param elt1 the first string to compare. May be NULL. |
553 | | * @param elt2 the second string to compare. May be NULL. |
554 | | * |
555 | | * @return TRUE if the strings are equal |
556 | | */ |
557 | | |
558 | | int CPLHashSetEqualStr(const void *elt1, const void *elt2) |
559 | 0 | { |
560 | 0 | const char *pszStr1 = static_cast<const char *>(elt1); |
561 | 0 | const char *pszStr2 = static_cast<const char *>(elt2); |
562 | |
|
563 | 0 | if (pszStr1 == nullptr && pszStr2 != nullptr) |
564 | 0 | return FALSE; |
565 | | |
566 | 0 | if (pszStr1 != nullptr && pszStr2 == nullptr) |
567 | 0 | return FALSE; |
568 | | |
569 | 0 | if (pszStr1 == nullptr && pszStr2 == nullptr) |
570 | 0 | return TRUE; |
571 | | |
572 | 0 | return strcmp(pszStr1, pszStr2) == 0; |
573 | 0 | } |