/src/tdengine/source/util/src/tsimplehash.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com> |
3 | | * |
4 | | * This program is free software: you can use, redistribute, and/or modify |
5 | | * it under the terms of the GNU Affero General Public License, version 3 |
6 | | * or later ("AGPL"), as published by the Free Software Foundation. |
7 | | * |
8 | | * This program is distributed in the hope that it will be useful, but WITHOUT |
9 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
10 | | * FITNESS FOR A PARTICULAR PURPOSE. |
11 | | * |
12 | | * You should have received a copy of the GNU Affero General Public License |
13 | | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
14 | | */ |
15 | | |
16 | | #include "tsimplehash.h" |
17 | | #include "taoserror.h" |
18 | | #include "tlog.h" |
19 | | #include "tdef.h" |
20 | | |
21 | | #define DEFAULT_BUF_PAGE_SIZE 1024 |
22 | 0 | #define SHASH_DEFAULT_LOAD_FACTOR 0.75 |
23 | 0 | #define HASH_MAX_CAPACITY (1024 * 1024 * 16L) |
24 | 0 | #define SHASH_NEED_RESIZE(_h) ((_h)->size >= (_h)->capacity * SHASH_DEFAULT_LOAD_FACTOR) |
25 | | |
26 | 0 | #define GET_SHASH_NODE_DATA(_n) (((SHNode*)_n)->data) |
27 | 0 | #define GET_SHASH_NODE_KEY(_n, _dl) ((char*)GET_SHASH_NODE_DATA(_n) + (_dl)) |
28 | | |
29 | 0 | #define HASH_INDEX(v, c) ((v) & ((c)-1)) |
30 | | |
31 | | #define FREE_HASH_NODE(_n, fp) \ |
32 | 0 | do { \ |
33 | 0 | if (fp) { \ |
34 | 0 | fp((_n)->data); \ |
35 | 0 | } \ |
36 | 0 | taosMemoryFreeClear(_n); \ |
37 | 0 | } while (0); |
38 | | |
39 | | struct SSHashObj { |
40 | | SHNode **hashList; |
41 | | size_t capacity; // number of slots |
42 | | int64_t size; // number of elements in hash table |
43 | | _hash_fn_t hashFp; // hash function |
44 | | _equal_fn_t equalFp; // equal function |
45 | | _hash_free_fn_t freeFp; // free function |
46 | | SArray *pHashNodeBuf; // hash node allocation buffer, 1k size of each page by default |
47 | | int32_t offset; // allocation offset in current page |
48 | | }; |
49 | | |
50 | 0 | static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { |
51 | 0 | int32_t len = (length < HASH_MAX_CAPACITY ? length : HASH_MAX_CAPACITY); |
52 | |
|
53 | 0 | int32_t i = 4; |
54 | | // while ((i * SHASH_DEFAULT_LOAD_FACTOR) < len) i = (i << 1u); |
55 | 0 | while (i < len) i = (i << 1u); |
56 | 0 | return i; |
57 | 0 | } |
58 | | |
59 | 0 | SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { |
60 | 0 | if (fn == NULL) { |
61 | 0 | return NULL; |
62 | 0 | } |
63 | | |
64 | 0 | if (capacity == 0) { |
65 | 0 | capacity = 4; |
66 | 0 | } |
67 | |
|
68 | 0 | SSHashObj *pHashObj = (SSHashObj *)taosMemoryMalloc(sizeof(SSHashObj)); |
69 | 0 | if (!pHashObj) { |
70 | 0 | return NULL; |
71 | 0 | } |
72 | | |
73 | | // the max slots is not defined by user |
74 | 0 | pHashObj->hashFp = fn; |
75 | 0 | pHashObj->capacity = taosHashCapacity((int32_t)capacity); |
76 | 0 | pHashObj->equalFp = memcmp; |
77 | |
|
78 | 0 | pHashObj->freeFp = NULL; |
79 | 0 | pHashObj->offset = 0; |
80 | 0 | pHashObj->size = 0; |
81 | | |
82 | 0 | pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *)); |
83 | 0 | if (!pHashObj->hashList) { |
84 | 0 | taosMemoryFree(pHashObj); |
85 | 0 | return NULL; |
86 | 0 | } |
87 | | |
88 | 0 | return pHashObj; |
89 | 0 | } |
90 | | |
91 | 0 | int32_t tSimpleHashGetSize(const SSHashObj *pHashObj) { |
92 | 0 | if (!pHashObj) { |
93 | 0 | return 0; |
94 | 0 | } |
95 | 0 | return (int32_t) pHashObj->size; |
96 | 0 | } |
97 | | |
98 | 0 | void tSimpleHashSetFreeFp(SSHashObj* pHashObj, _hash_free_fn_t freeFp) { |
99 | 0 | pHashObj->freeFp = freeFp; |
100 | 0 | } |
101 | | |
102 | 0 | static void* doInternalAlloc(SSHashObj* pHashObj, int32_t size) { |
103 | | #if 0 |
104 | | void** p = taosArrayGetLast(pHashObj->pHashNodeBuf); |
105 | | if (p == NULL || (pHashObj->offset + size) > DEFAULT_BUF_PAGE_SIZE) { |
106 | | // let's allocate one new page |
107 | | int32_t allocSize = TMAX(size, DEFAULT_BUF_PAGE_SIZE); |
108 | | void* pNewPage = taosMemoryMalloc(allocSize); |
109 | | if (pNewPage == NULL) { |
110 | | return NULL; |
111 | | } |
112 | | |
113 | | // if the allocate the buffer page is greater than the DFFAULT_BUF_PAGE_SIZE, |
114 | | // pHashObj->offset will always be greater than DEFAULT_BUF_PAGE_SIZE, which means that |
115 | | // current buffer page is full. And a new buffer page needs to be allocated. |
116 | | pHashObj->offset = size; |
117 | | taosArrayPush(pHashObj->pHashNodeBuf, &pNewPage); |
118 | | return pNewPage; |
119 | | } else { |
120 | | void* pPos = (char*)(*p) + pHashObj->offset; |
121 | | pHashObj->offset += size; |
122 | | return pPos; |
123 | | } |
124 | | #else |
125 | 0 | return taosMemoryMalloc(size); |
126 | 0 | #endif |
127 | 0 | } |
128 | | |
129 | | static SHNode *doCreateHashNode(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen, |
130 | 0 | uint32_t hashVal) { |
131 | 0 | SHNode *pNewNode = doInternalAlloc(pHashObj, sizeof(SHNode) + keyLen + dataLen); |
132 | 0 | if (!pNewNode) { |
133 | 0 | return NULL; |
134 | 0 | } |
135 | | |
136 | 0 | pNewNode->keyLen = keyLen; |
137 | 0 | pNewNode->dataLen = dataLen; |
138 | 0 | pNewNode->next = NULL; |
139 | 0 | pNewNode->hashVal = hashVal; |
140 | |
|
141 | 0 | if (data) { |
142 | 0 | memcpy(GET_SHASH_NODE_DATA(pNewNode), data, dataLen); |
143 | 0 | } |
144 | |
|
145 | 0 | memcpy(GET_SHASH_NODE_KEY(pNewNode, dataLen), key, keyLen); |
146 | 0 | return pNewNode; |
147 | 0 | } |
148 | | |
149 | 0 | static int32_t tSimpleHashTableResize(SSHashObj *pHashObj) { |
150 | 0 | int32_t code = 0; |
151 | 0 | if (!SHASH_NEED_RESIZE(pHashObj)) { |
152 | 0 | return code; |
153 | 0 | } |
154 | | |
155 | 0 | int32_t newCapacity = (int32_t)(pHashObj->capacity << 1u); |
156 | 0 | if (newCapacity > HASH_MAX_CAPACITY) { |
157 | 0 | uDebug("current capacity:%" PRIzu ", maximum capacity:%" PRId32 ", no resize applied due to limitation is reached", |
158 | 0 | pHashObj->capacity, (int32_t)HASH_MAX_CAPACITY); |
159 | 0 | return code; |
160 | 0 | } |
161 | | |
162 | | // int64_t st = taosGetTimestampUs(); |
163 | 0 | void *pNewEntryList = taosMemoryRealloc(pHashObj->hashList, POINTER_BYTES * newCapacity); |
164 | 0 | if (!pNewEntryList) { |
165 | 0 | uWarn("hash resize failed due to out of memory, capacity remain:%zu", pHashObj->capacity); |
166 | 0 | return terrno; |
167 | 0 | } |
168 | | |
169 | 0 | size_t inc = newCapacity - pHashObj->capacity; |
170 | 0 | memset((char *)pNewEntryList + pHashObj->capacity * POINTER_BYTES, 0, inc * sizeof(void *)); |
171 | |
|
172 | 0 | pHashObj->hashList = pNewEntryList; |
173 | 0 | pHashObj->capacity = newCapacity; |
174 | |
|
175 | 0 | for (int32_t idx = 0; idx < pHashObj->capacity; ++idx) { |
176 | 0 | SHNode *pNode = pHashObj->hashList[idx]; |
177 | 0 | if (!pNode) { |
178 | 0 | continue; |
179 | 0 | } |
180 | | |
181 | 0 | SHNode *pNext = NULL; |
182 | 0 | SHNode *pPrev = NULL; |
183 | |
|
184 | 0 | while (pNode != NULL) { |
185 | 0 | int32_t newIdx = HASH_INDEX(pNode->hashVal, pHashObj->capacity); |
186 | 0 | pNext = pNode->next; |
187 | 0 | if (newIdx != idx) { |
188 | 0 | if (!pPrev) { |
189 | 0 | pHashObj->hashList[idx] = pNext; |
190 | 0 | } else { |
191 | 0 | pPrev->next = pNext; |
192 | 0 | } |
193 | |
|
194 | 0 | pNode->next = pHashObj->hashList[newIdx]; |
195 | 0 | pHashObj->hashList[newIdx] = pNode; |
196 | 0 | } else { |
197 | 0 | pPrev = pNode; |
198 | 0 | } |
199 | |
|
200 | 0 | pNode = pNext; |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | | // int64_t et = taosGetTimestampUs(); |
205 | | // uDebug("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms", |
206 | | // (int32_t)pHashObj->capacity, |
207 | | // ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0); |
208 | 0 | return code; |
209 | 0 | } |
210 | | |
211 | 0 | int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen) { |
212 | 0 | if (!pHashObj || !key) { |
213 | 0 | return TSDB_CODE_INVALID_PARA; |
214 | 0 | } |
215 | | |
216 | 0 | uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); |
217 | | |
218 | | // need the resize process, write lock applied |
219 | 0 | if (SHASH_NEED_RESIZE(pHashObj)) { |
220 | 0 | int32_t code = tSimpleHashTableResize(pHashObj); |
221 | 0 | if (TSDB_CODE_SUCCESS != code) { |
222 | 0 | return code; |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | 0 | int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); |
227 | |
|
228 | 0 | SHNode *pNode = pHashObj->hashList[slot]; |
229 | 0 | if (!pNode) { |
230 | 0 | SHNode *pNewNode = doCreateHashNode(pHashObj, key, keyLen, data, dataLen, hashVal); |
231 | 0 | if (!pNewNode) { |
232 | 0 | return terrno; |
233 | 0 | } |
234 | | |
235 | 0 | pHashObj->hashList[slot] = pNewNode; |
236 | 0 | pHashObj->size += 1; |
237 | 0 | return 0; |
238 | 0 | } |
239 | | |
240 | 0 | while (pNode) { |
241 | 0 | if ((keyLen == pNode->keyLen) && (*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { |
242 | 0 | break; |
243 | 0 | } |
244 | 0 | pNode = pNode->next; |
245 | 0 | } |
246 | |
|
247 | 0 | if (!pNode) { |
248 | 0 | SHNode *pNewNode = doCreateHashNode(pHashObj, key, keyLen, data, dataLen, hashVal); |
249 | 0 | if (!pNewNode) { |
250 | 0 | return terrno; |
251 | 0 | } |
252 | 0 | pNewNode->next = pHashObj->hashList[slot]; |
253 | 0 | pHashObj->hashList[slot] = pNewNode; |
254 | 0 | pHashObj->size += 1; |
255 | 0 | } else if (data) { // update data |
256 | 0 | memcpy(GET_SHASH_NODE_DATA(pNode), data, dataLen); |
257 | 0 | } |
258 | | |
259 | 0 | return 0; |
260 | 0 | } |
261 | | |
262 | 0 | static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void *key, size_t keyLen, int32_t index) { |
263 | 0 | SHNode *pNode = pHashObj->hashList[index]; |
264 | 0 | while (pNode) { |
265 | 0 | const char *p = GET_SHASH_NODE_KEY(pNode, pNode->dataLen); |
266 | |
|
267 | 0 | if (pNode->keyLen == keyLen && ((*(pHashObj->equalFp))(p, key, keyLen) == 0)) { |
268 | 0 | break; |
269 | 0 | } |
270 | 0 | pNode = pNode->next; |
271 | 0 | } |
272 | |
|
273 | 0 | return pNode; |
274 | 0 | } |
275 | | |
276 | 0 | static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) { return tSimpleHashGetSize(pHashObj) == 0; } |
277 | | |
278 | 0 | void *tSimpleHashGet(SSHashObj *pHashObj, const void *key, size_t keyLen) { |
279 | 0 | if (!pHashObj || taosHashTableEmpty(pHashObj) || !key) { |
280 | 0 | return NULL; |
281 | 0 | } |
282 | | |
283 | 0 | uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); |
284 | |
|
285 | 0 | int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); |
286 | 0 | SHNode *pNode = pHashObj->hashList[slot]; |
287 | 0 | if (!pNode) { |
288 | 0 | return NULL; |
289 | 0 | } |
290 | | |
291 | 0 | char *data = NULL; |
292 | 0 | pNode = doSearchInEntryList(pHashObj, key, keyLen, slot); |
293 | 0 | if (pNode != NULL) { |
294 | 0 | data = GET_SHASH_NODE_DATA(pNode); |
295 | 0 | } |
296 | |
|
297 | 0 | return data; |
298 | 0 | } |
299 | | |
300 | 0 | int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key, size_t keyLen) { |
301 | 0 | int32_t code = TSDB_CODE_INVALID_PARA; |
302 | 0 | if (!pHashObj || !key) { |
303 | 0 | return code; |
304 | 0 | } |
305 | | |
306 | 0 | uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); |
307 | |
|
308 | 0 | int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); |
309 | |
|
310 | 0 | SHNode *pNode = pHashObj->hashList[slot]; |
311 | 0 | SHNode *pPrev = NULL; |
312 | 0 | while (pNode) { |
313 | 0 | if ((keyLen == pNode->keyLen) && (*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { |
314 | 0 | if (!pPrev) { |
315 | 0 | pHashObj->hashList[slot] = pNode->next; |
316 | 0 | } else { |
317 | 0 | pPrev->next = pNode->next; |
318 | 0 | } |
319 | |
|
320 | 0 | FREE_HASH_NODE(pNode, pHashObj->freeFp); |
321 | 0 | pHashObj->size -= 1; |
322 | 0 | code = TSDB_CODE_SUCCESS; |
323 | 0 | break; |
324 | 0 | } |
325 | 0 | pPrev = pNode; |
326 | 0 | pNode = pNode->next; |
327 | 0 | } |
328 | |
|
329 | 0 | return code; |
330 | 0 | } |
331 | | |
332 | 0 | int32_t tSimpleHashIterateRemove(SSHashObj *pHashObj, const void *key, size_t keyLen, void **pIter, int32_t *iter) { |
333 | 0 | if (!pHashObj || !key) { |
334 | 0 | return TSDB_CODE_FAILED; |
335 | 0 | } |
336 | | |
337 | 0 | uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); |
338 | |
|
339 | 0 | int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); |
340 | |
|
341 | 0 | SHNode *pNode = pHashObj->hashList[slot]; |
342 | 0 | SHNode *pPrev = NULL; |
343 | 0 | while (pNode) { |
344 | 0 | if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) { |
345 | 0 | if (!pPrev) { |
346 | 0 | pHashObj->hashList[slot] = pNode->next; |
347 | 0 | } else { |
348 | 0 | pPrev->next = pNode->next; |
349 | 0 | } |
350 | |
|
351 | 0 | if (*pIter == (void *)GET_SHASH_NODE_DATA(pNode)) { |
352 | 0 | *pIter = pPrev ? GET_SHASH_NODE_DATA(pPrev) : NULL; |
353 | 0 | } |
354 | |
|
355 | 0 | FREE_HASH_NODE(pNode, pHashObj->freeFp); |
356 | 0 | pHashObj->size -= 1; |
357 | 0 | break; |
358 | 0 | } |
359 | 0 | pPrev = pNode; |
360 | 0 | pNode = pNode->next; |
361 | 0 | } |
362 | |
|
363 | 0 | return TSDB_CODE_SUCCESS; |
364 | 0 | } |
365 | | |
366 | 0 | void tSimpleHashClear(SSHashObj *pHashObj) { |
367 | 0 | if (!pHashObj || taosHashTableEmpty(pHashObj)) { |
368 | 0 | return; |
369 | 0 | } |
370 | | |
371 | 0 | SHNode *pNode = NULL, *pNext = NULL; |
372 | 0 | for (int32_t i = 0; i < pHashObj->capacity; ++i) { |
373 | 0 | pNode = pHashObj->hashList[i]; |
374 | 0 | if (!pNode) { |
375 | 0 | continue; |
376 | 0 | } |
377 | | |
378 | 0 | while (pNode) { |
379 | 0 | pNext = pNode->next; |
380 | 0 | FREE_HASH_NODE(pNode, pHashObj->freeFp); |
381 | 0 | pNode = pNext; |
382 | 0 | } |
383 | |
|
384 | 0 | pHashObj->hashList[i] = NULL; |
385 | 0 | } |
386 | |
|
387 | 0 | pHashObj->offset = 0; |
388 | 0 | pHashObj->size = 0; |
389 | 0 | } |
390 | | |
391 | 0 | void tSimpleHashCleanup(SSHashObj *pHashObj) { |
392 | 0 | if (!pHashObj) { |
393 | 0 | return; |
394 | 0 | } |
395 | | |
396 | 0 | tSimpleHashClear(pHashObj); |
397 | 0 | taosMemoryFreeClear(pHashObj->hashList); |
398 | 0 | taosMemoryFree(pHashObj); |
399 | 0 | } |
400 | | |
401 | 0 | size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj) { |
402 | 0 | if (!pHashObj) { |
403 | 0 | return 0; |
404 | 0 | } |
405 | | |
406 | 0 | return (pHashObj->capacity * sizeof(void *)) + sizeof(SHNode) * tSimpleHashGetSize(pHashObj) + sizeof(SSHashObj); |
407 | 0 | } |
408 | | |
409 | 0 | void *tSimpleHashIterate(const SSHashObj *pHashObj, void *data, int32_t *iter) { |
410 | 0 | if (!pHashObj) { |
411 | 0 | return NULL; |
412 | 0 | } |
413 | | |
414 | 0 | SHNode *pNode = NULL; |
415 | |
|
416 | 0 | if (!data) { |
417 | 0 | for (int32_t i = *iter; i < pHashObj->capacity; ++i) { |
418 | 0 | pNode = pHashObj->hashList[i]; |
419 | 0 | if (!pNode) { |
420 | 0 | continue; |
421 | 0 | } |
422 | 0 | *iter = i; |
423 | 0 | return GET_SHASH_NODE_DATA(pNode); |
424 | 0 | } |
425 | 0 | return NULL; |
426 | 0 | } |
427 | | |
428 | 0 | pNode = (SHNode *)((char *)data - offsetof(SHNode, data)); |
429 | |
|
430 | 0 | if (pNode->next) { |
431 | 0 | return GET_SHASH_NODE_DATA(pNode->next); |
432 | 0 | } |
433 | | |
434 | 0 | ++(*iter); |
435 | 0 | for (int32_t i = *iter; i < pHashObj->capacity; ++i) { |
436 | 0 | pNode = pHashObj->hashList[i]; |
437 | 0 | if (!pNode) { |
438 | 0 | continue; |
439 | 0 | } |
440 | 0 | *iter = i; |
441 | 0 | return GET_SHASH_NODE_DATA(pNode); |
442 | 0 | } |
443 | | |
444 | 0 | return NULL; |
445 | 0 | } |