/src/gdal/port/cpl_hash_set.cpp
Line | Count | Source |
1 | | /********************************************************************** |
2 | | * |
3 | | * Name: cpl_hash_set.cpp |
4 | | * Project: CPL - Common Portability Library |
5 | | * Purpose: Hash set functions. |
6 | | * Author: Even Rouault, <even dot rouault at spatialys.com> |
7 | | * |
8 | | ********************************************************************** |
9 | | * Copyright (c) 2008-2009, Even Rouault <even dot rouault at spatialys.com> |
10 | | * |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #include "cpl_hash_set.h" |
15 | | |
16 | | #include <cstring> |
17 | | |
18 | | #include "cpl_conv.h" |
19 | | #include "cpl_error.h" |
20 | | #include "cpl_list.h" |
21 | | |
22 | | struct _CPLHashSet |
23 | | { |
24 | | CPLHashSetHashFunc fnHashFunc; |
25 | | CPLHashSetEqualFunc fnEqualFunc; |
26 | | CPLHashSetFreeEltFunc fnFreeEltFunc; |
27 | | CPLList **tabList; |
28 | | int nSize; |
29 | | int nIndiceAllocatedSize; |
30 | | int nAllocatedSize; |
31 | | CPLList *psRecyclingList; |
32 | | int nRecyclingListSize; |
33 | | bool bRehash; |
34 | | #ifdef HASH_DEBUG |
35 | | int nCollisions; |
36 | | #endif |
37 | | }; |
38 | | |
39 | | constexpr int anPrimes[] = { |
40 | | 53, 97, 193, 389, 769, 1543, 3079, |
41 | | 6151, 12289, 24593, 49157, 98317, 196613, 393241, |
42 | | 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, |
43 | | 100663319, 201326611, 402653189, 805306457, 1610612741}; |
44 | | |
45 | | /************************************************************************/ |
46 | | /* CPLHashSetNew() */ |
47 | | /************************************************************************/ |
48 | | |
49 | | /** |
50 | | * Creates a new hash set |
51 | | * |
52 | | * The hash function must return a hash value for the elements to insert. |
53 | | * If fnHashFunc is NULL, CPLHashSetHashPointer will be used. |
54 | | * |
55 | | * The equal function must return if two elements are equal. |
56 | | * If fnEqualFunc is NULL, CPLHashSetEqualPointer will be used. |
57 | | * |
58 | | * The free function is used to free elements inserted in the hash set, |
59 | | * when the hash set is destroyed, when elements are removed or replaced. |
60 | | * If fnFreeEltFunc is NULL, elements inserted into the hash set will not be |
61 | | * freed. |
62 | | * |
63 | | * @param fnHashFunc hash function. May be NULL. |
64 | | * @param fnEqualFunc equal function. May be NULL. |
65 | | * @param fnFreeEltFunc element free function. May be NULL. |
66 | | * |
67 | | * @return a new hash set |
68 | | */ |
69 | | |
70 | | CPLHashSet *CPLHashSetNew(CPLHashSetHashFunc fnHashFunc, |
71 | | CPLHashSetEqualFunc fnEqualFunc, |
72 | | CPLHashSetFreeEltFunc fnFreeEltFunc) |
73 | 0 | { |
74 | 0 | CPLHashSet *set = static_cast<CPLHashSet *>(CPLMalloc(sizeof(CPLHashSet))); |
75 | 0 | set->fnHashFunc = fnHashFunc ? fnHashFunc : CPLHashSetHashPointer; |
76 | 0 | set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : CPLHashSetEqualPointer; |
77 | 0 | set->fnFreeEltFunc = fnFreeEltFunc; |
78 | 0 | set->nSize = 0; |
79 | 0 | set->tabList = static_cast<CPLList **>(CPLCalloc(sizeof(CPLList *), 53)); |
80 | 0 | set->nIndiceAllocatedSize = 0; |
81 | 0 | set->nAllocatedSize = 53; |
82 | 0 | set->psRecyclingList = nullptr; |
83 | 0 | set->nRecyclingListSize = 0; |
84 | 0 | set->bRehash = false; |
85 | | #ifdef HASH_DEBUG |
86 | | set->nCollisions = 0; |
87 | | #endif |
88 | 0 | return set; |
89 | 0 | } |
90 | | |
91 | | /************************************************************************/ |
92 | | /* CPLHashSetSize() */ |
93 | | /************************************************************************/ |
94 | | |
95 | | /** |
96 | | * Returns the number of elements inserted in the hash set |
97 | | * |
98 | | * Note: this is not the internal size of the hash set |
99 | | * |
100 | | * @param set the hash set |
101 | | * |
102 | | * @return the number of elements in the hash set |
103 | | */ |
104 | | |
105 | | int CPLHashSetSize(const CPLHashSet *set) |
106 | 0 | { |
107 | 0 | CPLAssert(set != nullptr); |
108 | 0 | return set->nSize; |
109 | 0 | } |
110 | | |
111 | | /************************************************************************/ |
112 | | /* CPLHashSetGetNewListElt() */ |
113 | | /************************************************************************/ |
114 | | |
115 | | static CPLList *CPLHashSetGetNewListElt(CPLHashSet *set) |
116 | 0 | { |
117 | 0 | if (set->psRecyclingList) |
118 | 0 | { |
119 | 0 | CPLList *psRet = set->psRecyclingList; |
120 | 0 | psRet->pData = nullptr; |
121 | 0 | set->nRecyclingListSize--; |
122 | 0 | set->psRecyclingList = psRet->psNext; |
123 | 0 | return psRet; |
124 | 0 | } |
125 | | |
126 | 0 | return static_cast<CPLList *>(CPLMalloc(sizeof(CPLList))); |
127 | 0 | } |
128 | | |
129 | | /************************************************************************/ |
130 | | /* CPLHashSetReturnListElt() */ |
131 | | /************************************************************************/ |
132 | | |
133 | | static void CPLHashSetReturnListElt(CPLHashSet *set, CPLList *psList) |
134 | 0 | { |
135 | 0 | if (set->nRecyclingListSize < 128) |
136 | 0 | { |
137 | 0 | psList->psNext = set->psRecyclingList; |
138 | 0 | set->psRecyclingList = psList; |
139 | 0 | set->nRecyclingListSize++; |
140 | 0 | } |
141 | 0 | else |
142 | 0 | { |
143 | 0 | CPLFree(psList); |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | /************************************************************************/ |
148 | | /* CPLHashSetClearInternal() */ |
149 | | /************************************************************************/ |
150 | | |
151 | | static void CPLHashSetClearInternal(CPLHashSet *set, bool bFinalize) |
152 | 0 | { |
153 | 0 | CPLAssert(set != nullptr); |
154 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
155 | 0 | { |
156 | 0 | CPLList *cur = set->tabList[i]; |
157 | 0 | while (cur) |
158 | 0 | { |
159 | 0 | if (set->fnFreeEltFunc) |
160 | 0 | set->fnFreeEltFunc(cur->pData); |
161 | 0 | CPLList *psNext = cur->psNext; |
162 | 0 | if (bFinalize) |
163 | 0 | CPLFree(cur); |
164 | 0 | else |
165 | 0 | CPLHashSetReturnListElt(set, cur); |
166 | 0 | cur = psNext; |
167 | 0 | } |
168 | 0 | set->tabList[i] = nullptr; |
169 | 0 | } |
170 | 0 | set->bRehash = false; |
171 | 0 | } |
172 | | |
173 | | /************************************************************************/ |
174 | | /* CPLHashSetDestroy() */ |
175 | | /************************************************************************/ |
176 | | |
177 | | /** |
178 | | * Destroys an allocated hash set. |
179 | | * |
180 | | * This function also frees the elements if a free function was |
181 | | * provided at the creation of the hash set. |
182 | | * |
183 | | * @param set the hash set |
184 | | */ |
185 | | |
186 | | void CPLHashSetDestroy(CPLHashSet *set) |
187 | 0 | { |
188 | 0 | CPLHashSetClearInternal(set, true); |
189 | 0 | CPLFree(set->tabList); |
190 | 0 | CPLListDestroy(set->psRecyclingList); |
191 | 0 | CPLFree(set); |
192 | 0 | } |
193 | | |
194 | | /************************************************************************/ |
195 | | /* CPLHashSetClear() */ |
196 | | /************************************************************************/ |
197 | | |
198 | | /** |
199 | | * Clear all elements from a hash set. |
200 | | * |
201 | | * This function also frees the elements if a free function was |
202 | | * provided at the creation of the hash set. |
203 | | * |
204 | | * @param set the hash set |
205 | | */ |
206 | | |
207 | | void CPLHashSetClear(CPLHashSet *set) |
208 | 0 | { |
209 | 0 | CPLHashSetClearInternal(set, false); |
210 | 0 | set->tabList = static_cast<CPLList **>( |
211 | 0 | CPLRealloc(set->tabList, sizeof(CPLList *) * 53)); |
212 | 0 | set->nIndiceAllocatedSize = 0; |
213 | 0 | set->nAllocatedSize = 53; |
214 | | #ifdef HASH_DEBUG |
215 | | set->nCollisions = 0; |
216 | | #endif |
217 | 0 | set->nSize = 0; |
218 | 0 | } |
219 | | |
220 | | /************************************************************************/ |
221 | | /* CPLHashSetForeach() */ |
222 | | /************************************************************************/ |
223 | | |
224 | | /** |
225 | | * Walk through the hash set and runs the provided function on all the |
226 | | * elements |
227 | | * |
228 | | * This function is provided the user_data argument of CPLHashSetForeach. |
229 | | * It must return TRUE to go on the walk through the hash set, or FALSE to |
230 | | * make it stop. |
231 | | * |
232 | | * Note : the structure of the hash set must *NOT* be modified during the |
233 | | * walk. |
234 | | * |
235 | | * @param set the hash set. |
236 | | * @param fnIterFunc the function called on each element. |
237 | | * @param user_data the user data provided to the function. |
238 | | */ |
239 | | |
240 | | void CPLHashSetForeach(CPLHashSet *set, CPLHashSetIterEltFunc fnIterFunc, |
241 | | void *user_data) |
242 | 0 | { |
243 | 0 | CPLAssert(set != nullptr); |
244 | 0 | if (!fnIterFunc) |
245 | 0 | return; |
246 | | |
247 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
248 | 0 | { |
249 | 0 | CPLList *cur = set->tabList[i]; |
250 | 0 | while (cur) |
251 | 0 | { |
252 | 0 | if (!fnIterFunc(cur->pData, user_data)) |
253 | 0 | return; |
254 | | |
255 | 0 | cur = cur->psNext; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | } |
259 | | |
260 | | /************************************************************************/ |
261 | | /* CPLHashSetRehash() */ |
262 | | /************************************************************************/ |
263 | | |
264 | | static void CPLHashSetRehash(CPLHashSet *set) |
265 | 0 | { |
266 | 0 | int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize]; |
267 | 0 | CPLList **newTabList = static_cast<CPLList **>( |
268 | 0 | CPLCalloc(sizeof(CPLList *), nNewAllocatedSize)); |
269 | | #ifdef HASH_DEBUG |
270 | | CPLDebug("CPLHASH", |
271 | | "hashSet=%p, nSize=%d, nCollisions=%d, " |
272 | | "fCollisionRate=%.02f", |
273 | | set, set->nSize, set->nCollisions, |
274 | | set->nCollisions * 100.0 / set->nSize); |
275 | | set->nCollisions = 0; |
276 | | #endif |
277 | 0 | for (int i = 0; i < set->nAllocatedSize; i++) |
278 | 0 | { |
279 | 0 | CPLList *cur = set->tabList[i]; |
280 | 0 | while (cur) |
281 | 0 | { |
282 | 0 | const unsigned long nNewHashVal = |
283 | 0 | set->fnHashFunc(cur->pData) % nNewAllocatedSize; |
284 | | #ifdef HASH_DEBUG |
285 | | if (newTabList[nNewHashVal]) |
286 | | set->nCollisions++; |
287 | | #endif |
288 | 0 | CPLList *psNext = cur->psNext; |
289 | 0 | cur->psNext = newTabList[nNewHashVal]; |
290 | 0 | newTabList[nNewHashVal] = cur; |
291 | 0 | cur = psNext; |
292 | 0 | } |
293 | 0 | } |
294 | 0 | CPLFree(set->tabList); |
295 | 0 | set->tabList = newTabList; |
296 | 0 | set->nAllocatedSize = nNewAllocatedSize; |
297 | 0 | set->bRehash = false; |
298 | 0 | } |
299 | | |
300 | | /************************************************************************/ |
301 | | /* CPLHashSetFindPtr() */ |
302 | | /************************************************************************/ |
303 | | |
304 | | static void **CPLHashSetFindPtr(CPLHashSet *set, const void *elt) |
305 | 0 | { |
306 | 0 | const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize; |
307 | 0 | CPLList *cur = set->tabList[nHashVal]; |
308 | 0 | while (cur) |
309 | 0 | { |
310 | 0 | if (set->fnEqualFunc(cur->pData, elt)) |
311 | 0 | return &cur->pData; |
312 | 0 | cur = cur->psNext; |
313 | 0 | } |
314 | 0 | return nullptr; |
315 | 0 | } |
316 | | |
317 | | /************************************************************************/ |
318 | | /* CPLHashSetInsert() */ |
319 | | /************************************************************************/ |
320 | | |
321 | | /** |
322 | | * Inserts an element into a hash set. |
323 | | * |
324 | | * If the element was already inserted in the hash set, the previous |
325 | | * element is replaced by the new element. If a free function was provided, |
326 | | * it is used to free the previously inserted element |
327 | | * |
328 | | * @param set the hash set |
329 | | * @param elt the new element to insert in the hash set |
330 | | * |
331 | | * @return TRUE if the element was not already in the hash set |
332 | | */ |
333 | | |
334 | | int CPLHashSetInsert(CPLHashSet *set, void *elt) |
335 | 0 | { |
336 | 0 | CPLAssert(set != nullptr); |
337 | 0 | void **pElt = CPLHashSetFindPtr(set, elt); |
338 | 0 | if (pElt) |
339 | 0 | { |
340 | 0 | if (set->fnFreeEltFunc) |
341 | 0 | set->fnFreeEltFunc(*pElt); |
342 | |
|
343 | 0 | *pElt = elt; |
344 | 0 | return FALSE; |
345 | 0 | } |
346 | | |
347 | 0 | if (set->nSize >= 2 * set->nAllocatedSize / 3 || |
348 | 0 | (set->bRehash && set->nIndiceAllocatedSize > 0 && |
349 | 0 | set->nSize <= set->nAllocatedSize / 2)) |
350 | 0 | { |
351 | 0 | set->nIndiceAllocatedSize++; |
352 | 0 | CPLHashSetRehash(set); |
353 | 0 | } |
354 | |
|
355 | 0 | const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize; |
356 | | #ifdef HASH_DEBUG |
357 | | if (set->tabList[nHashVal]) |
358 | | set->nCollisions++; |
359 | | #endif |
360 | |
|
361 | 0 | CPLList *new_elt = CPLHashSetGetNewListElt(set); |
362 | 0 | new_elt->pData = elt; |
363 | 0 | new_elt->psNext = set->tabList[nHashVal]; |
364 | 0 | set->tabList[nHashVal] = new_elt; |
365 | 0 | set->nSize++; |
366 | |
|
367 | 0 | return TRUE; |
368 | 0 | } |
369 | | |
370 | | /************************************************************************/ |
371 | | /* CPLHashSetLookup() */ |
372 | | /************************************************************************/ |
373 | | |
374 | | /** |
375 | | * Returns the element found in the hash set corresponding to the element to |
376 | | * look up The element must not be modified. |
377 | | * |
378 | | * @param set the hash set |
379 | | * @param elt the element to look up in the hash set |
380 | | * |
381 | | * @return the element found in the hash set or NULL |
382 | | */ |
383 | | |
384 | | void *CPLHashSetLookup(CPLHashSet *set, const void *elt) |
385 | 0 | { |
386 | 0 | CPLAssert(set != nullptr); |
387 | 0 | void **pElt = CPLHashSetFindPtr(set, elt); |
388 | 0 | if (pElt) |
389 | 0 | return *pElt; |
390 | | |
391 | 0 | return nullptr; |
392 | 0 | } |
393 | | |
394 | | /************************************************************************/ |
395 | | /* CPLHashSetRemoveInternal() */ |
396 | | /************************************************************************/ |
397 | | |
398 | | static bool CPLHashSetRemoveInternal(CPLHashSet *set, const void *elt, |
399 | | bool bDeferRehash) |
400 | 0 | { |
401 | 0 | CPLAssert(set != nullptr); |
402 | 0 | if (set->nIndiceAllocatedSize > 0 && set->nSize <= set->nAllocatedSize / 2) |
403 | 0 | { |
404 | 0 | set->nIndiceAllocatedSize--; |
405 | 0 | if (bDeferRehash) |
406 | 0 | set->bRehash = true; |
407 | 0 | else |
408 | 0 | CPLHashSetRehash(set); |
409 | 0 | } |
410 | |
|
411 | 0 | int nHashVal = static_cast<int>(set->fnHashFunc(elt) % set->nAllocatedSize); |
412 | 0 | CPLList *cur = set->tabList[nHashVal]; |
413 | 0 | CPLList *prev = nullptr; |
414 | 0 | while (cur) |
415 | 0 | { |
416 | 0 | if (set->fnEqualFunc(cur->pData, elt)) |
417 | 0 | { |
418 | 0 | if (prev) |
419 | 0 | prev->psNext = cur->psNext; |
420 | 0 | else |
421 | 0 | set->tabList[nHashVal] = cur->psNext; |
422 | |
|
423 | 0 | if (set->fnFreeEltFunc) |
424 | 0 | set->fnFreeEltFunc(cur->pData); |
425 | |
|
426 | 0 | CPLHashSetReturnListElt(set, cur); |
427 | | #ifdef HASH_DEBUG |
428 | | if (set->tabList[nHashVal]) |
429 | | set->nCollisions--; |
430 | | #endif |
431 | 0 | set->nSize--; |
432 | 0 | return true; |
433 | 0 | } |
434 | 0 | prev = cur; |
435 | 0 | cur = cur->psNext; |
436 | 0 | } |
437 | 0 | return false; |
438 | 0 | } |
439 | | |
440 | | /************************************************************************/ |
441 | | /* CPLHashSetRemove() */ |
442 | | /************************************************************************/ |
443 | | |
444 | | /** |
445 | | * Removes an element from a hash set |
446 | | * |
447 | | * @param set the hash set |
448 | | * @param elt the new element to remove from the hash set |
449 | | * |
450 | | * @return TRUE if the element was in the hash set |
451 | | */ |
452 | | |
453 | | int CPLHashSetRemove(CPLHashSet *set, const void *elt) |
454 | 0 | { |
455 | 0 | return CPLHashSetRemoveInternal(set, elt, false); |
456 | 0 | } |
457 | | |
458 | | /************************************************************************/ |
459 | | /* CPLHashSetRemoveDeferRehash() */ |
460 | | /************************************************************************/ |
461 | | |
462 | | /** |
463 | | * Removes an element from a hash set. |
464 | | * |
465 | | * This will defer potential rehashing of the set to later calls to |
466 | | * CPLHashSetInsert() or CPLHashSetRemove(). |
467 | | * |
468 | | * @param set the hash set |
469 | | * @param elt the new element to remove from the hash set |
470 | | * |
471 | | * @return TRUE if the element was in the hash set |
472 | | */ |
473 | | |
474 | | int CPLHashSetRemoveDeferRehash(CPLHashSet *set, const void *elt) |
475 | 0 | { |
476 | 0 | return CPLHashSetRemoveInternal(set, elt, true); |
477 | 0 | } |
478 | | |
479 | | /************************************************************************/ |
480 | | /* CPLHashSetHashPointer() */ |
481 | | /************************************************************************/ |
482 | | |
483 | | /** |
484 | | * Hash function for an arbitrary pointer |
485 | | * |
486 | | * @param elt the arbitrary pointer to hash |
487 | | * |
488 | | * @return the hash value of the pointer |
489 | | */ |
490 | | |
491 | | unsigned long CPLHashSetHashPointer(const void *elt) |
492 | 0 | { |
493 | 0 | return static_cast<unsigned long>( |
494 | 0 | reinterpret_cast<GUIntptr_t>(const_cast<void *>(elt))); |
495 | 0 | } |
496 | | |
497 | | /************************************************************************/ |
498 | | /* CPLHashSetEqualPointer() */ |
499 | | /************************************************************************/ |
500 | | |
501 | | /** |
502 | | * Equality function for arbitrary pointers |
503 | | * |
504 | | * @param elt1 the first arbitrary pointer to compare |
505 | | * @param elt2 the second arbitrary pointer to compare |
506 | | * |
507 | | * @return TRUE if the pointers are equal |
508 | | */ |
509 | | |
510 | | int CPLHashSetEqualPointer(const void *elt1, const void *elt2) |
511 | 0 | { |
512 | 0 | return elt1 == elt2; |
513 | 0 | } |
514 | | |
515 | | /************************************************************************/ |
516 | | /* CPLHashSetHashStr() */ |
517 | | /************************************************************************/ |
518 | | |
519 | | /** |
520 | | * Hash function for a zero-terminated string |
521 | | * |
522 | | * @param elt the string to hash. May be NULL. |
523 | | * |
524 | | * @return the hash value of the string |
525 | | */ |
526 | | |
527 | | CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW |
528 | | unsigned long CPLHashSetHashStr(const void *elt) |
529 | 0 | { |
530 | 0 | if (elt == nullptr) |
531 | 0 | return 0; |
532 | | |
533 | 0 | const unsigned char *pszStr = static_cast<const unsigned char *>(elt); |
534 | 0 | unsigned long hash = 0; |
535 | |
|
536 | 0 | int c = 0; |
537 | 0 | while ((c = *pszStr++) != '\0') |
538 | 0 | hash = c + (hash << 6) + (hash << 16) - hash; |
539 | |
|
540 | 0 | return hash; |
541 | 0 | } |
542 | | |
543 | | /************************************************************************/ |
544 | | /* CPLHashSetEqualStr() */ |
545 | | /************************************************************************/ |
546 | | |
547 | | /** |
548 | | * Equality function for strings |
549 | | * |
550 | | * @param elt1 the first string to compare. May be NULL. |
551 | | * @param elt2 the second string to compare. May be NULL. |
552 | | * |
553 | | * @return TRUE if the strings are equal |
554 | | */ |
555 | | |
556 | | int CPLHashSetEqualStr(const void *elt1, const void *elt2) |
557 | 0 | { |
558 | 0 | const char *pszStr1 = static_cast<const char *>(elt1); |
559 | 0 | const char *pszStr2 = static_cast<const char *>(elt2); |
560 | |
|
561 | 0 | if (pszStr1 == nullptr && pszStr2 != nullptr) |
562 | 0 | return FALSE; |
563 | | |
564 | 0 | if (pszStr1 != nullptr && pszStr2 == nullptr) |
565 | 0 | return FALSE; |
566 | | |
567 | 0 | if (pszStr1 == nullptr && pszStr2 == nullptr) |
568 | 0 | return TRUE; |
569 | | |
570 | 0 | return strcmp(pszStr1, pszStr2) == 0; |
571 | 0 | } |