/src/fluent-bit/lib/onigmo/st.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* This is a public domain general purpose hash table package |
2 | | originally written by Peter Moore @ UCB. |
3 | | |
4 | | The hash table data structures were redesigned and the package was |
5 | | rewritten by Vladimir Makarov <vmakarov@redhat.com>. */ |
6 | | |
7 | | /* The original package implemented classic bucket-based hash tables |
8 | | with entries doubly linked for an access by their insertion order. |
9 | | To decrease pointer chasing and as a consequence to improve a data |
10 | | locality the current implementation is based on storing entries in |
11 | | an array and using hash tables with open addressing. The current |
12 | | entries are more compact in comparison with the original ones and |
13 | | this also improves the data locality. |
14 | | |
15 | | The hash table has two arrays called *bins* and *entries*. |
16 | | |
17 | | bins: |
18 | | ------- |
19 | | | | entries array: |
20 | | |-------| -------------------------------- |
21 | | | index | | | entry: | | | |
22 | | |-------| | | | | | |
23 | | | ... | | ... | hash | ... | ... | |
24 | | |-------| | | key | | | |
25 | | | empty | | | record | | | |
26 | | |-------| -------------------------------- |
27 | | | ... | ^ ^ |
28 | | |-------| |_ entries start |_ entries bound |
29 | | |deleted| |
30 | | ------- |
31 | | |
32 | | o The entry array contains table entries in the same order as they |
33 | | were inserted. |
34 | | |
35 | | When the first entry is deleted, a variable containing index of |
36 | | the current first entry (*entries start*) is changed. In all |
37 | | other cases of the deletion, we just mark the entry as deleted by |
38 | | using a reserved hash value. |
39 | | |
40 | | Such organization of the entry storage makes operations of the |
41 | | table shift and the entries traversal very fast. |
42 | | |
43 | | o The bins provide access to the entries by their keys. The |
44 | | key hash is mapped to a bin containing *index* of the |
45 | | corresponding entry in the entry array. |
46 | | |
47 | | The bin array size is always power of two, it makes mapping very |
48 | | fast by using the corresponding lower bits of the hash. |
49 | | Generally it is not a good idea to ignore some part of the hash. |
50 | | But alternative approach is worse. For example, we could use a |
51 | | modulo operation for mapping and a prime number for the size of |
52 | | the bin array. Unfortunately, the modulo operation for big |
53 | | 64-bit numbers are extremely slow (it takes more than 100 cycles |
54 | | on modern Intel CPUs). |
55 | | |
56 | | Still other bits of the hash value are used when the mapping |
57 | | results in a collision. In this case we use a secondary hash |
58 | | value which is a result of a function of the collision bin |
59 | | index and the original hash value. The function choice |
60 | | guarantees that we can traverse all bins and finally find the |
61 | | corresponding bin as after several iterations the function |
62 | | becomes a full cycle linear congruential generator because it |
63 | | satisfies requirements of the Hull-Dobell theorem. |
64 | | |
65 | | When an entry is removed from the table besides marking the |
66 | | hash in the corresponding entry described above, we also mark |
67 | | the bin by a special value in order to find entries which had |
68 | | a collision with the removed entries. |
69 | | |
70 | | There are two reserved values for the bins. One denotes an |
71 | | empty bin, another one denotes a bin for a deleted entry. |
72 | | |
73 | | o The length of the bin array is at least two times more than the |
74 | | entry array length. This keeps the table load factor healthy. |
75 | | The trigger of rebuilding the table is always a case when we can |
76 | | not insert an entry anymore at the entries bound. We could |
77 | | change the entries bound too in case of deletion but than we need |
78 | | a special code to count bins with corresponding deleted entries |
79 | | and reset the bin values when there are too many bins |
80 | | corresponding deleted entries |
81 | | |
82 | | Table rebuilding is done by creation of a new entry array and |
83 | | bins of an appropriate size. We also try to reuse the arrays |
84 | | in some cases by compacting the array and removing deleted |
85 | | entries. |
86 | | |
87 | | o To save memory very small tables have no allocated arrays |
88 | | bins. We use a linear search for an access by a key. |
89 | | |
90 | | o To save more memory we use 8-, 16-, 32- and 64- bit indexes in |
91 | | bins depending on the current hash table size. |
92 | | |
93 | | o The implementation takes into account that the table can be |
94 | | rebuilt during hashing or comparison functions. It can happen if |
95 | | the functions are implemented in Ruby and a thread switch occurs |
96 | | during their execution. |
97 | | |
98 | | This implementation speeds up the Ruby hash table benchmarks in |
99 | | average by more 40% on Intel Haswell CPU. |
100 | | |
101 | | */ |
102 | | |
103 | | #ifdef RUBY |
104 | | #include "internal.h" |
105 | | #else |
106 | | #include "regint.h" |
107 | | #include "st.h" |
108 | | #endif |
109 | | |
110 | | #include <stdio.h> |
111 | | #include <stdlib.h> |
112 | | #include <string.h> |
113 | | #include <assert.h> |
114 | | |
115 | | #ifdef __GNUC__ |
116 | 0 | #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p) |
117 | 69.9k | #define EXPECT(expr, val) __builtin_expect(expr, val) |
118 | | #define ATTRIBUTE_UNUSED __attribute__((unused)) |
119 | | #else |
120 | | #define PREFETCH(addr, write_p) |
121 | | #define EXPECT(expr, val) (expr) |
122 | | #define ATTRIBUTE_UNUSED |
123 | | #endif |
124 | | |
125 | | #ifdef ST_DEBUG |
126 | | #define st_assert assert |
127 | | #else |
128 | 20.2k | #define st_assert(cond) ((void)(0 && (cond))) |
129 | | #endif |
130 | | |
131 | | /* The type of hashes. */ |
132 | | typedef st_index_t st_hash_t; |
133 | | |
134 | | struct st_table_entry { |
135 | | st_hash_t hash; |
136 | | st_data_t key; |
137 | | st_data_t record; |
138 | | }; |
139 | | |
140 | | #ifdef RUBY |
141 | | #define type_numhash st_hashtype_num |
142 | | static const struct st_hash_type st_hashtype_num = { |
143 | | st_numcmp, |
144 | | st_numhash, |
145 | | }; |
146 | | |
147 | | /* extern int strcmp(const char *, const char *); */ |
148 | | static st_index_t strhash(st_data_t); |
149 | | static const struct st_hash_type type_strhash = { |
150 | | strcmp, |
151 | | strhash, |
152 | | }; |
153 | | |
154 | | static st_index_t strcasehash(st_data_t); |
155 | | static const struct st_hash_type type_strcasehash = { |
156 | | st_locale_insensitive_strcasecmp, |
157 | | strcasehash, |
158 | | }; |
159 | | #endif /* RUBY */ |
160 | | |
161 | | /* Value used to catch uninitialized entries/bins during debugging. |
162 | | There is a possibility for a false alarm, but its probability is |
163 | | extremely small. */ |
164 | | #define ST_INIT_VAL 0xafafafafafafafaf |
165 | | #define ST_INIT_VAL_BYTE 0xafa |
166 | | |
167 | | #ifdef RUBY |
168 | | #undef malloc |
169 | | #undef realloc |
170 | | #undef calloc |
171 | | #undef free |
172 | | #define malloc ruby_xmalloc |
173 | | #define calloc ruby_xcalloc |
174 | | #define realloc ruby_xrealloc |
175 | | #define free ruby_xfree |
176 | | #else /* RUBY */ |
177 | | #define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n)) |
178 | | #endif /* RUBY */ |
179 | | |
180 | 9.02k | #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) |
181 | | #define PTR_EQUAL(tab, ptr, hash_val, key_) \ |
182 | 36.0k | ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) |
183 | | |
184 | | /* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set |
185 | | up to TRUE if the table is rebuilt during the comparison. */ |
186 | | #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ |
187 | 36.0k | do { \ |
188 | 36.0k | unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \ |
189 | 36.0k | res = PTR_EQUAL(tab, ptr, hash_val, key); \ |
190 | 36.0k | rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \ |
191 | 36.0k | } while (FALSE) |
192 | | |
193 | | /* Features of a table. */ |
194 | | struct st_features { |
195 | | /* Power of 2 used for number of allocated entries. */ |
196 | | unsigned char entry_power; |
197 | | /* Power of 2 used for number of allocated bins. Depending on the |
198 | | table size, the number of bins is 2-4 times more than the |
199 | | number of entries. */ |
200 | | unsigned char bin_power; |
201 | | /* Enumeration of sizes of bins (8-bit, 16-bit etc). */ |
202 | | unsigned char size_ind; |
203 | | /* Bins are packed in words of type st_index_t. The following is |
204 | | a size of bins counted by words. */ |
205 | | st_index_t bins_words; |
206 | | }; |
207 | | |
208 | | /* Features of all possible size tables. */ |
209 | | #if SIZEOF_ST_INDEX_T == 8 |
210 | 2.25k | #define MAX_POWER2 62 |
211 | | static const struct st_features features[] = { |
212 | | {0, 1, 0, 0x0}, |
213 | | {1, 2, 0, 0x1}, |
214 | | {2, 3, 0, 0x1}, |
215 | | {3, 4, 0, 0x2}, |
216 | | {4, 5, 0, 0x4}, |
217 | | {5, 6, 0, 0x8}, |
218 | | {6, 7, 0, 0x10}, |
219 | | {7, 8, 0, 0x20}, |
220 | | {8, 9, 1, 0x80}, |
221 | | {9, 10, 1, 0x100}, |
222 | | {10, 11, 1, 0x200}, |
223 | | {11, 12, 1, 0x400}, |
224 | | {12, 13, 1, 0x800}, |
225 | | {13, 14, 1, 0x1000}, |
226 | | {14, 15, 1, 0x2000}, |
227 | | {15, 16, 1, 0x4000}, |
228 | | {16, 17, 2, 0x10000}, |
229 | | {17, 18, 2, 0x20000}, |
230 | | {18, 19, 2, 0x40000}, |
231 | | {19, 20, 2, 0x80000}, |
232 | | {20, 21, 2, 0x100000}, |
233 | | {21, 22, 2, 0x200000}, |
234 | | {22, 23, 2, 0x400000}, |
235 | | {23, 24, 2, 0x800000}, |
236 | | {24, 25, 2, 0x1000000}, |
237 | | {25, 26, 2, 0x2000000}, |
238 | | {26, 27, 2, 0x4000000}, |
239 | | {27, 28, 2, 0x8000000}, |
240 | | {28, 29, 2, 0x10000000}, |
241 | | {29, 30, 2, 0x20000000}, |
242 | | {30, 31, 2, 0x40000000}, |
243 | | {31, 32, 2, 0x80000000}, |
244 | | {32, 33, 3, 0x200000000}, |
245 | | {33, 34, 3, 0x400000000}, |
246 | | {34, 35, 3, 0x800000000}, |
247 | | {35, 36, 3, 0x1000000000}, |
248 | | {36, 37, 3, 0x2000000000}, |
249 | | {37, 38, 3, 0x4000000000}, |
250 | | {38, 39, 3, 0x8000000000}, |
251 | | {39, 40, 3, 0x10000000000}, |
252 | | {40, 41, 3, 0x20000000000}, |
253 | | {41, 42, 3, 0x40000000000}, |
254 | | {42, 43, 3, 0x80000000000}, |
255 | | {43, 44, 3, 0x100000000000}, |
256 | | {44, 45, 3, 0x200000000000}, |
257 | | {45, 46, 3, 0x400000000000}, |
258 | | {46, 47, 3, 0x800000000000}, |
259 | | {47, 48, 3, 0x1000000000000}, |
260 | | {48, 49, 3, 0x2000000000000}, |
261 | | {49, 50, 3, 0x4000000000000}, |
262 | | {50, 51, 3, 0x8000000000000}, |
263 | | {51, 52, 3, 0x10000000000000}, |
264 | | {52, 53, 3, 0x20000000000000}, |
265 | | {53, 54, 3, 0x40000000000000}, |
266 | | {54, 55, 3, 0x80000000000000}, |
267 | | {55, 56, 3, 0x100000000000000}, |
268 | | {56, 57, 3, 0x200000000000000}, |
269 | | {57, 58, 3, 0x400000000000000}, |
270 | | {58, 59, 3, 0x800000000000000}, |
271 | | {59, 60, 3, 0x1000000000000000}, |
272 | | {60, 61, 3, 0x2000000000000000}, |
273 | | {61, 62, 3, 0x4000000000000000}, |
274 | | {62, 63, 3, 0x8000000000000000}, |
275 | | }; |
276 | | |
277 | | #else |
278 | | #define MAX_POWER2 30 |
279 | | |
280 | | static const struct st_features features[] = { |
281 | | {0, 1, 0, 0x1}, |
282 | | {1, 2, 0, 0x1}, |
283 | | {2, 3, 0, 0x2}, |
284 | | {3, 4, 0, 0x4}, |
285 | | {4, 5, 0, 0x8}, |
286 | | {5, 6, 0, 0x10}, |
287 | | {6, 7, 0, 0x20}, |
288 | | {7, 8, 0, 0x40}, |
289 | | {8, 9, 1, 0x100}, |
290 | | {9, 10, 1, 0x200}, |
291 | | {10, 11, 1, 0x400}, |
292 | | {11, 12, 1, 0x800}, |
293 | | {12, 13, 1, 0x1000}, |
294 | | {13, 14, 1, 0x2000}, |
295 | | {14, 15, 1, 0x4000}, |
296 | | {15, 16, 1, 0x8000}, |
297 | | {16, 17, 2, 0x20000}, |
298 | | {17, 18, 2, 0x40000}, |
299 | | {18, 19, 2, 0x80000}, |
300 | | {19, 20, 2, 0x100000}, |
301 | | {20, 21, 2, 0x200000}, |
302 | | {21, 22, 2, 0x400000}, |
303 | | {22, 23, 2, 0x800000}, |
304 | | {23, 24, 2, 0x1000000}, |
305 | | {24, 25, 2, 0x2000000}, |
306 | | {25, 26, 2, 0x4000000}, |
307 | | {26, 27, 2, 0x8000000}, |
308 | | {27, 28, 2, 0x10000000}, |
309 | | {28, 29, 2, 0x20000000}, |
310 | | {29, 30, 2, 0x40000000}, |
311 | | {30, 31, 2, 0x80000000}, |
312 | | }; |
313 | | |
314 | | #endif |
315 | | |
316 | | /* The reserved hash value and its substitution. */ |
317 | 24.8k | #define RESERVED_HASH_VAL (~(st_hash_t) 0) |
318 | 0 | #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0) |
319 | | |
320 | | const st_hash_t st_reserved_hash_val = RESERVED_HASH_VAL; |
321 | | const st_hash_t st_reserved_hash_substitution_val = RESERVED_HASH_SUBSTITUTION_VAL; |
322 | | |
323 | | /* Return hash value of KEY for table TAB. */ |
324 | | static inline st_hash_t |
325 | | do_hash(st_data_t key, st_table *tab) |
326 | 15.7k | { |
327 | 15.7k | st_hash_t hash = (st_hash_t)(tab->type->hash)(key); |
328 | | |
329 | | /* RESERVED_HASH_VAL is used for a deleted entry. Map it into |
330 | | another value. Such mapping should be extremely rare. */ |
331 | 15.7k | return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash; |
332 | 15.7k | } |
333 | | |
334 | | /* Power of 2 defining the minimal number of allocated entries. */ |
335 | 2.25k | #define MINIMAL_POWER2 2 |
336 | | |
337 | | #if MINIMAL_POWER2 < 2 |
338 | | #error "MINIMAL_POWER2 should be >= 2" |
339 | | #endif |
340 | | |
341 | | /* If the power2 of the allocated `entries` is less than the following |
342 | | value, don't allocate bins and use a linear search. */ |
343 | 2.25k | #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4 |
344 | | |
345 | | /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */ |
346 | | static int |
347 | | get_power2(st_index_t size) |
348 | 2.25k | { |
349 | 2.25k | unsigned int n; |
350 | | |
351 | 9.02k | for (n = 0; size != 0; n++) |
352 | 6.76k | size >>= 1; |
353 | 2.25k | if (n <= MAX_POWER2) |
354 | 2.25k | return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n; |
355 | | #ifdef RUBY |
356 | | /* Ran out of the table entries */ |
357 | | rb_raise(rb_eRuntimeError, "st_table too big"); |
358 | | #endif |
359 | | /* should raise exception */ |
360 | 0 | return -1; |
361 | 2.25k | } |
362 | | |
363 | | /* Return value of N-th bin in array BINS of table with bins size |
364 | | index S. */ |
365 | | static inline st_index_t |
366 | | get_bin(st_index_t *bins, int s, st_index_t n) |
367 | 0 | { |
368 | 0 | return (s == 0 ? ((unsigned char *) bins)[n] |
369 | 0 | : s == 1 ? ((unsigned short *) bins)[n] |
370 | 0 | : s == 2 ? ((unsigned int *) bins)[n] |
371 | 0 | : ((st_index_t *) bins)[n]); |
372 | 0 | } |
373 | | |
374 | | /* Set up N-th bin in array BINS of table with bins size index S to |
375 | | value V. */ |
376 | | static inline void |
377 | | set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) |
378 | 0 | { |
379 | 0 | if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v; |
380 | 0 | else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v; |
381 | 0 | else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v; |
382 | 0 | else ((st_index_t *) bins)[n] = v; |
383 | 0 | } |
384 | | |
385 | | /* These macros define reserved values for empty table bin and table |
386 | | bin which contains a deleted entry. We will never use such values |
387 | | for an entry index in bins. */ |
388 | 0 | #define EMPTY_BIN 0 |
389 | 0 | #define DELETED_BIN 1 |
390 | | /* Base of a real entry index in the bins. */ |
391 | 0 | #define ENTRY_BASE 2 |
392 | | |
393 | | /* Mark I-th bin of table TAB as empty, in other words not |
394 | | corresponding to any entry. */ |
395 | 0 | #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN)) |
396 | | |
397 | | /* Values used for not found entry and bin with given |
398 | | characteristics. */ |
399 | 40.5k | #define UNDEFINED_ENTRY_IND (~(st_index_t) 0) |
400 | 18.0k | #define UNDEFINED_BIN_IND (~(st_index_t) 0) |
401 | | |
402 | | /* Entry and bin values returned when we found a table rebuild during |
403 | | the search. */ |
404 | 0 | #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1) |
405 | 0 | #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1) |
406 | | |
407 | | /* Mark I-th bin of table TAB as corresponding to a deleted table |
408 | | entry. Update number of entries in the table and number of bins |
409 | | corresponding to deleted entries. */ |
410 | | #define MARK_BIN_DELETED(tab, i) \ |
411 | 0 | do { \ |
412 | 0 | st_assert(i != UNDEFINED_BIN_IND); \ |
413 | 0 | st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \ |
414 | 0 | set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \ |
415 | 0 | } while (0) |
416 | | |
417 | | /* Macros to check that value B is used empty bins and bins |
418 | | corresponding deleted entries. */ |
419 | 0 | #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN) |
420 | 0 | #define DELETED_BIN_P(b) ((b) == DELETED_BIN) |
421 | 0 | #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN) |
422 | | |
423 | | /* Macros to check empty bins and bins corresponding to deleted |
424 | | entries. Bins are given by their index I in table TAB. */ |
425 | | #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) |
426 | | #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) |
427 | | #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) |
428 | | |
429 | | /* Macros for marking and checking deleted entries given by their |
430 | | pointer E_PTR. */ |
431 | 9.02k | #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL) |
432 | | #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL) |
433 | | |
434 | | /* Return bin size index of table TAB. */ |
435 | | static inline unsigned int |
436 | | get_size_ind(const st_table *tab) |
437 | 0 | { |
438 | 0 | return tab->size_ind; |
439 | 0 | } |
440 | | |
441 | | /* Return the number of allocated bins of table TAB. */ |
442 | | static inline st_index_t |
443 | | get_bins_num(const st_table *tab) |
444 | 0 | { |
445 | 0 | return ((st_index_t) 1)<<tab->bin_power; |
446 | 0 | } |
447 | | |
448 | | /* Return mask for a bin index in table TAB. */ |
449 | | static inline st_index_t |
450 | | bins_mask(const st_table *tab) |
451 | 0 | { |
452 | 0 | return get_bins_num(tab) - 1; |
453 | 0 | } |
454 | | |
455 | | /* Return the index of table TAB bin corresponding to |
456 | | HASH_VALUE. */ |
457 | | static inline st_index_t |
458 | | hash_bin(st_hash_t hash_value, st_table *tab) |
459 | 0 | { |
460 | 0 | return hash_value & bins_mask(tab); |
461 | 0 | } |
462 | | |
463 | | /* Return the number of allocated entries of table TAB. */ |
464 | | static inline st_index_t |
465 | | get_allocated_entries(const st_table *tab) |
466 | 11.2k | { |
467 | 11.2k | return ((st_index_t) 1)<<tab->entry_power; |
468 | 11.2k | } |
469 | | |
470 | | /* Return size of the allocated bins of table TAB. */ |
471 | | static inline st_index_t |
472 | | bins_size(const st_table *tab) |
473 | 0 | { |
474 | 0 | return features[tab->entry_power].bins_words * sizeof (st_index_t); |
475 | 0 | } |
476 | | |
477 | | /* Mark all bins of table TAB as empty. */ |
478 | | static void |
479 | | initialize_bins(st_table *tab) |
480 | 0 | { |
481 | 0 | memset(tab->bins, 0, bins_size(tab)); |
482 | 0 | } |
483 | | |
484 | | /* Make table TAB empty. */ |
485 | | static void |
486 | | make_tab_empty(st_table *tab) |
487 | 2.25k | { |
488 | 2.25k | tab->num_entries = 0; |
489 | 2.25k | tab->entries_start = tab->entries_bound = 0; |
490 | 2.25k | if (tab->bins != NULL) |
491 | 0 | initialize_bins(tab); |
492 | 2.25k | } |
493 | | |
494 | | #ifdef ST_DEBUG |
495 | | #define st_assert_notinitial(ent) \ |
496 | | do { \ |
497 | | st_assert(ent.hash != (st_hash_t) ST_INIT_VAL); \ |
498 | | st_assert(ent.key != ST_INIT_VAL); \ |
499 | | st_assert(ent.record != ST_INIT_VAL); \ |
500 | | } while (0) |
501 | | /* Check the table T consistency. It can be extremely slow. So use |
502 | | it only for debugging. */ |
503 | | static void |
504 | | st_check(st_table *tab) |
505 | | { |
506 | | st_index_t d, e, i, n, p; |
507 | | |
508 | | for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1) |
509 | | ; |
510 | | p = i; |
511 | | st_assert(p >= MINIMAL_POWER2); |
512 | | st_assert(tab->entries_bound <= get_allocated_entries(tab)); |
513 | | st_assert(tab->entries_start <= tab->entries_bound); |
514 | | n = 0; |
515 | | return; |
516 | | if (tab->entries_bound != 0) |
517 | | for (i = tab->entries_start; i < tab->entries_bound; i++) { |
518 | | st_assert_notinitial(tab->entries[i]); |
519 | | if (! DELETED_ENTRY_P(&tab->entries[i])) |
520 | | n++; |
521 | | } |
522 | | st_assert(n == tab->num_entries); |
523 | | if (tab->bins == NULL) |
524 | | st_assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS); |
525 | | else { |
526 | | st_assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS); |
527 | | for (n = d = i = 0; i < get_bins_num(tab); i++) { |
528 | | st_assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL); |
529 | | if (IND_DELETED_BIN_P(tab, i)) { |
530 | | d++; |
531 | | continue; |
532 | | } |
533 | | else if (IND_EMPTY_BIN_P(tab, i)) |
534 | | continue; |
535 | | n++; |
536 | | e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE; |
537 | | st_assert(tab->entries_start <= e && e < tab->entries_bound); |
538 | | st_assert(! DELETED_ENTRY_P(&tab->entries[e])); |
539 | | st_assert_notinitial(tab->entries[e]); |
540 | | } |
541 | | st_assert(n == tab->num_entries); |
542 | | st_assert(n + d < get_bins_num(tab)); |
543 | | } |
544 | | } |
545 | | #endif |
546 | | |
547 | | #ifdef HASH_LOG |
548 | | #ifdef HAVE_UNISTD_H |
549 | | #include <unistd.h> |
550 | | #endif |
551 | | static struct { |
552 | | int all, total, num, str, strcase; |
553 | | } collision; |
554 | | |
555 | | /* Flag switching off output of package statistics at the end of |
556 | | program. */ |
557 | | static int init_st = 0; |
558 | | |
559 | | /* Output overall number of table searches and collisions into a |
560 | | temporary file. */ |
561 | | static void |
562 | | stat_col(void) |
563 | | { |
564 | | char fname[10+sizeof(long)*3]; |
565 | | FILE *f; |
566 | | if (!collision.total) return; |
567 | | f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w"); |
568 | | if (f == NULL) |
569 | | return; |
570 | | fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total, |
571 | | ((double)collision.all / (collision.total)) * 100); |
572 | | fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase); |
573 | | fclose(f); |
574 | | } |
575 | | #endif |
576 | | |
577 | | /* Create and return table with TYPE which can hold at least SIZE |
578 | | entries. The real number of entries which the table can hold is |
579 | | the nearest power of two for SIZE. */ |
580 | | st_table * |
581 | | st_init_table_with_size(const struct st_hash_type *type, st_index_t size) |
582 | 2.25k | { |
583 | 2.25k | st_table *tab; |
584 | 2.25k | int n; |
585 | | |
586 | | #ifdef HASH_LOG |
587 | | #if HASH_LOG+0 < 0 |
588 | | { |
589 | | const char *e = getenv("ST_HASH_LOG"); |
590 | | if (!e || !*e) init_st = 1; |
591 | | } |
592 | | #endif |
593 | | if (init_st == 0) { |
594 | | init_st = 1; |
595 | | atexit(stat_col); |
596 | | } |
597 | | #endif |
598 | | |
599 | 2.25k | n = get_power2(size); |
600 | 2.25k | #ifndef RUBY |
601 | 2.25k | if (n < 0) |
602 | 0 | return NULL; |
603 | 2.25k | #endif |
604 | 2.25k | tab = (st_table *) malloc(sizeof (st_table)); |
605 | 2.25k | #ifndef RUBY |
606 | 2.25k | if (tab == NULL) |
607 | 0 | return NULL; |
608 | 2.25k | #endif |
609 | 2.25k | tab->type = type; |
610 | 2.25k | tab->entry_power = n; |
611 | 2.25k | tab->bin_power = features[n].bin_power; |
612 | 2.25k | tab->size_ind = features[n].size_ind; |
613 | 2.25k | if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) |
614 | 2.25k | tab->bins = NULL; |
615 | 0 | else { |
616 | 0 | tab->bins = (st_index_t *) malloc(bins_size(tab)); |
617 | 0 | #ifndef RUBY |
618 | 0 | if (tab->bins == NULL) { |
619 | 0 | free(tab); |
620 | 0 | return NULL; |
621 | 0 | } |
622 | 0 | #endif |
623 | 0 | } |
624 | 2.25k | tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab) |
625 | 2.25k | * sizeof(st_table_entry)); |
626 | 2.25k | #ifndef RUBY |
627 | 2.25k | if (tab->entries == NULL) { |
628 | 0 | st_free_table(tab); |
629 | 0 | return NULL; |
630 | 0 | } |
631 | 2.25k | #endif |
632 | | #ifdef ST_DEBUG |
633 | | memset(tab->entries, ST_INIT_VAL_BYTE, |
634 | | get_allocated_entries(tab) * sizeof(st_table_entry)); |
635 | | if (tab->bins != NULL) |
636 | | memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab)); |
637 | | #endif |
638 | 2.25k | make_tab_empty(tab); |
639 | 2.25k | tab->rebuilds_num = 0; |
640 | | #ifdef ST_DEBUG |
641 | | st_check(tab); |
642 | | #endif |
643 | 2.25k | return tab; |
644 | 2.25k | } |
645 | | |
646 | | #ifdef RUBY |
647 | | /* Create and return table with TYPE which can hold a minimal number |
648 | | of entries (see comments for get_power2). */ |
649 | | st_table * |
650 | | st_init_table(const struct st_hash_type *type) |
651 | | { |
652 | | return st_init_table_with_size(type, 0); |
653 | | } |
654 | | |
655 | | /* Create and return table which can hold a minimal number of |
656 | | numbers. */ |
657 | | st_table * |
658 | | st_init_numtable(void) |
659 | | { |
660 | | return st_init_table(&type_numhash); |
661 | | } |
662 | | |
663 | | /* Create and return table which can hold SIZE numbers. */ |
664 | | st_table * |
665 | | st_init_numtable_with_size(st_index_t size) |
666 | | { |
667 | | return st_init_table_with_size(&type_numhash, size); |
668 | | } |
669 | | |
670 | | /* Create and return table which can hold a minimal number of |
671 | | strings. */ |
672 | | st_table * |
673 | | st_init_strtable(void) |
674 | | { |
675 | | return st_init_table(&type_strhash); |
676 | | } |
677 | | |
678 | | /* Create and return table which can hold SIZE strings. */ |
679 | | st_table * |
680 | | st_init_strtable_with_size(st_index_t size) |
681 | | { |
682 | | return st_init_table_with_size(&type_strhash, size); |
683 | | } |
684 | | |
685 | | /* Create and return table which can hold a minimal number of strings |
686 | | whose character case is ignored. */ |
687 | | st_table * |
688 | | st_init_strcasetable(void) |
689 | | { |
690 | | return st_init_table(&type_strcasehash); |
691 | | } |
692 | | |
693 | | /* Create and return table which can hold SIZE strings whose character |
694 | | case is ignored. */ |
695 | | st_table * |
696 | | st_init_strcasetable_with_size(st_index_t size) |
697 | | { |
698 | | return st_init_table_with_size(&type_strcasehash, size); |
699 | | } |
700 | | |
701 | | /* Make table TAB empty. */ |
702 | | void |
703 | | st_clear(st_table *tab) |
704 | | { |
705 | | make_tab_empty(tab); |
706 | | tab->rebuilds_num++; |
707 | | #ifdef ST_DEBUG |
708 | | st_check(tab); |
709 | | #endif |
710 | | } |
711 | | #endif /* RUBY */ |
712 | | |
713 | | /* Free table TAB space. */ |
714 | | void |
715 | | st_free_table(st_table *tab) |
716 | 2.25k | { |
717 | 2.25k | if (tab->bins != NULL) |
718 | 0 | free(tab->bins); |
719 | 2.25k | free(tab->entries); |
720 | 2.25k | free(tab); |
721 | 2.25k | } |
722 | | |
723 | | #ifdef RUBY |
724 | | /* Return byte size of memory allocted for table TAB. */ |
725 | | size_t |
726 | | st_memsize(const st_table *tab) |
727 | | { |
728 | | return(sizeof(st_table) |
729 | | + (tab->bins == NULL ? 0 : bins_size(tab)) |
730 | | + get_allocated_entries(tab) * sizeof(st_table_entry)); |
731 | | } |
732 | | #endif /* RUBY */ |
733 | | |
734 | | static st_index_t |
735 | | find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key); |
736 | | |
737 | | static st_index_t |
738 | | find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key); |
739 | | |
740 | | static st_index_t |
741 | | find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key); |
742 | | |
743 | | static st_index_t |
744 | | find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, |
745 | | st_data_t key, st_index_t *bin_ind); |
746 | | |
747 | | #ifdef HASH_LOG |
748 | | static void |
749 | | count_collision(const struct st_hash_type *type) |
750 | | { |
751 | | collision.all++; |
752 | | if (type == &type_numhash) { |
753 | | collision.num++; |
754 | | } |
755 | | else if (type == &type_strhash) { |
756 | | collision.strcase++; |
757 | | } |
758 | | else if (type == &type_strcasehash) { |
759 | | collision.str++; |
760 | | } |
761 | | } |
762 | | |
763 | | #define COLLISION (collision_check ? count_collision(tab->type) : (void)0) |
764 | | #define FOUND_BIN (collision_check ? collision.total++ : (void)0) |
765 | | #define collision_check 0 |
766 | | #else |
767 | | #define COLLISION |
768 | | #define FOUND_BIN |
769 | | #endif |
770 | | |
771 | | /* If the number of entries in the table is at least REBUILD_THRESHOLD |
772 | | times less than the entry array length, decrease the table |
773 | | size. */ |
774 | 0 | #define REBUILD_THRESHOLD 4 |
775 | | |
776 | | #if REBUILD_THRESHOLD < 2 |
777 | | #error "REBUILD_THRESHOLD should be >= 2" |
778 | | #endif |
779 | | |
780 | | /* Rebuild table TAB. Rebuilding removes all deleted bins and entries |
781 | | and can change size of the table entries and bins arrays. |
782 | | Rebuilding is implemented by creation of a new table or by |
783 | | compaction of the existing one. */ |
784 | | static void |
785 | | rebuild_table(st_table *tab) |
786 | 0 | { |
787 | 0 | st_index_t i, ni, bound; |
788 | 0 | unsigned int size_ind; |
789 | 0 | st_table *new_tab; |
790 | 0 | st_table_entry *entries, *new_entries; |
791 | 0 | st_table_entry *curr_entry_ptr; |
792 | 0 | st_index_t *bins; |
793 | 0 | st_index_t bin_ind; |
794 | |
|
795 | 0 | st_assert(tab != NULL); |
796 | 0 | bound = tab->entries_bound; |
797 | 0 | entries = tab->entries; |
798 | 0 | if ((2 * tab->num_entries <= get_allocated_entries(tab) |
799 | 0 | && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) |
800 | 0 | || tab->num_entries < (1 << MINIMAL_POWER2)) { |
801 | | /* Compaction: */ |
802 | 0 | tab->num_entries = 0; |
803 | 0 | if (tab->bins != NULL) |
804 | 0 | initialize_bins(tab); |
805 | 0 | new_tab = tab; |
806 | 0 | new_entries = entries; |
807 | 0 | } |
808 | 0 | else { |
809 | 0 | new_tab = st_init_table_with_size(tab->type, |
810 | 0 | 2 * tab->num_entries - 1); |
811 | 0 | new_entries = new_tab->entries; |
812 | 0 | } |
813 | 0 | ni = 0; |
814 | 0 | bins = new_tab->bins; |
815 | 0 | size_ind = get_size_ind(new_tab); |
816 | 0 | for (i = tab->entries_start; i < bound; i++) { |
817 | 0 | curr_entry_ptr = &entries[i]; |
818 | 0 | PREFETCH(entries + i + 1, 0); |
819 | 0 | if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) |
820 | 0 | continue; |
821 | 0 | if (&new_entries[ni] != curr_entry_ptr) |
822 | 0 | new_entries[ni] = *curr_entry_ptr; |
823 | 0 | if (EXPECT(bins != NULL, 1)) { |
824 | 0 | bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash, |
825 | 0 | curr_entry_ptr->key); |
826 | 0 | st_assert(bin_ind != UNDEFINED_BIN_IND); |
827 | 0 | st_assert(tab == new_tab || new_tab->rebuilds_num == 0); |
828 | 0 | st_assert(IND_EMPTY_BIN_P(new_tab, bin_ind)); |
829 | 0 | set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE); |
830 | 0 | } |
831 | 0 | new_tab->num_entries++; |
832 | 0 | ni++; |
833 | 0 | } |
834 | 0 | if (new_tab != tab) { |
835 | 0 | tab->entry_power = new_tab->entry_power; |
836 | 0 | tab->bin_power = new_tab->bin_power; |
837 | 0 | tab->size_ind = new_tab->size_ind; |
838 | 0 | st_assert(tab->num_entries == ni); |
839 | 0 | st_assert(new_tab->num_entries == ni); |
840 | 0 | if (tab->bins != NULL) |
841 | 0 | free(tab->bins); |
842 | 0 | tab->bins = new_tab->bins; |
843 | 0 | free(tab->entries); |
844 | 0 | tab->entries = new_tab->entries; |
845 | 0 | free(new_tab); |
846 | 0 | } |
847 | 0 | tab->entries_start = 0; |
848 | 0 | tab->entries_bound = tab->num_entries; |
849 | 0 | tab->rebuilds_num++; |
850 | | #ifdef ST_DEBUG |
851 | | st_check(tab); |
852 | | #endif |
853 | 0 | } |
854 | | |
855 | | /* Return the next secondary hash index for table TAB using previous |
856 | | index IND and PERTERB. Finally modulo of the function becomes a |
857 | | full *cycle linear congruential generator*, in other words it |
858 | | guarantees traversing all table bins in extreme case. |
859 | | |
860 | | According the Hull-Dobell theorem a generator |
861 | | "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff |
862 | | o m and c are relatively prime |
863 | | o a-1 is divisible by all prime factors of m |
864 | | o a-1 is divisible by 4 if m is divisible by 4. |
865 | | |
866 | | For our case a is 5, c is 1, and m is a power of two. */ |
867 | | static inline st_index_t |
868 | | secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) |
869 | 0 | { |
870 | 0 | *perterb >>= 11; |
871 | 0 | ind = (ind << 2) + ind + *perterb + 1; |
872 | 0 | return hash_bin(ind, tab); |
873 | 0 | } |
874 | | |
875 | | /* Find an entry with HASH_VALUE and KEY in TABLE using a linear |
876 | | search. Return the index of the found entry in array `entries`. |
877 | | If it is not found, return UNDEFINED_ENTRY_IND. If the table was |
878 | | rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ |
879 | | static inline st_index_t |
880 | | find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) |
881 | 24.8k | { |
882 | 24.8k | int eq_p, rebuilt_p; |
883 | 24.8k | st_index_t i, bound; |
884 | 24.8k | st_table_entry *entries; |
885 | | |
886 | 24.8k | bound = tab->entries_bound; |
887 | 24.8k | entries = tab->entries; |
888 | 51.8k | for (i = tab->entries_start; i < bound; i++) { |
889 | 36.0k | DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); |
890 | 36.0k | if (EXPECT(rebuilt_p, 0)) |
891 | 0 | return REBUILT_TABLE_ENTRY_IND; |
892 | 36.0k | if (eq_p) |
893 | 9.02k | return i; |
894 | 36.0k | } |
895 | 15.7k | return UNDEFINED_ENTRY_IND; |
896 | 24.8k | } |
897 | | |
898 | | /* Use the quadratic probing. The method has a better data locality |
899 | | but more collisions than the current approach. In average it |
900 | | results in a bit slower search. */ |
901 | | /*#define QUADRATIC_PROBE*/ |
902 | | |
903 | | /* Return index of entry with HASH_VALUE and KEY in table TAB. If |
904 | | there is no such entry, return UNDEFINED_ENTRY_IND. If the table |
905 | | was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ |
906 | | static st_index_t |
907 | | find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) |
908 | 0 | { |
909 | 0 | int eq_p, rebuilt_p; |
910 | 0 | st_index_t ind; |
911 | | #ifdef QUADRATIC_PROBE |
912 | | st_index_t d; |
913 | | #else |
914 | 0 | st_index_t peterb; |
915 | 0 | #endif |
916 | 0 | st_index_t bin; |
917 | 0 | st_table_entry *entries = tab->entries; |
918 | |
|
919 | 0 | st_assert(tab != NULL); |
920 | 0 | st_assert(tab->bins != NULL); |
921 | 0 | ind = hash_bin(hash_value, tab); |
922 | | #ifdef QUADRATIC_PROBE |
923 | | d = 1; |
924 | | #else |
925 | 0 | peterb = hash_value; |
926 | 0 | #endif |
927 | 0 | FOUND_BIN; |
928 | 0 | for (;;) { |
929 | 0 | bin = get_bin(tab->bins, get_size_ind(tab), ind); |
930 | 0 | if (! EMPTY_OR_DELETED_BIN_P(bin)) { |
931 | 0 | DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); |
932 | 0 | if (EXPECT(rebuilt_p, 0)) |
933 | 0 | return REBUILT_TABLE_ENTRY_IND; |
934 | 0 | if (eq_p) |
935 | 0 | break; |
936 | 0 | } else if (EMPTY_BIN_P(bin)) |
937 | 0 | return UNDEFINED_ENTRY_IND; |
938 | | #ifdef QUADRATIC_PROBE |
939 | | ind = hash_bin(ind + d, tab); |
940 | | d++; |
941 | | #else |
942 | 0 | ind = secondary_hash(ind, tab, &peterb); |
943 | 0 | #endif |
944 | 0 | COLLISION; |
945 | 0 | } |
946 | 0 | return bin; |
947 | 0 | } |
948 | | |
949 | | /* Find and return index of table TAB bin corresponding to an entry |
950 | | with HASH_VALUE and KEY. If there is no such bin, return |
951 | | UNDEFINED_BIN_IND. If the table was rebuilt during the search, |
952 | | return REBUILT_TABLE_BIN_IND. */ |
953 | | static st_index_t |
954 | | find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) |
955 | 0 | { |
956 | 0 | int eq_p, rebuilt_p; |
957 | 0 | st_index_t ind; |
958 | | #ifdef QUADRATIC_PROBE |
959 | | st_index_t d; |
960 | | #else |
961 | 0 | st_index_t peterb; |
962 | 0 | #endif |
963 | 0 | st_index_t bin; |
964 | 0 | st_table_entry *entries = tab->entries; |
965 | |
|
966 | 0 | st_assert(tab != NULL); |
967 | 0 | st_assert(tab->bins != NULL); |
968 | 0 | ind = hash_bin(hash_value, tab); |
969 | | #ifdef QUADRATIC_PROBE |
970 | | d = 1; |
971 | | #else |
972 | 0 | peterb = hash_value; |
973 | 0 | #endif |
974 | 0 | FOUND_BIN; |
975 | 0 | for (;;) { |
976 | 0 | bin = get_bin(tab->bins, get_size_ind(tab), ind); |
977 | 0 | if (! EMPTY_OR_DELETED_BIN_P(bin)) { |
978 | 0 | DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); |
979 | 0 | if (EXPECT(rebuilt_p, 0)) |
980 | 0 | return REBUILT_TABLE_BIN_IND; |
981 | 0 | if (eq_p) |
982 | 0 | break; |
983 | 0 | } else if (EMPTY_BIN_P(bin)) |
984 | 0 | return UNDEFINED_BIN_IND; |
985 | | #ifdef QUADRATIC_PROBE |
986 | | ind = hash_bin(ind + d, tab); |
987 | | d++; |
988 | | #else |
989 | 0 | ind = secondary_hash(ind, tab, &peterb); |
990 | 0 | #endif |
991 | 0 | COLLISION; |
992 | 0 | } |
993 | 0 | return ind; |
994 | 0 | } |
995 | | |
996 | | /* Find and return index of table TAB bin corresponding to an entry |
997 | | with HASH_VALUE and KEY. The entry should be in the table |
998 | | already. */ |
999 | | static st_index_t |
1000 | | find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) |
1001 | 0 | { |
1002 | 0 | st_index_t ind; |
1003 | | #ifdef QUADRATIC_PROBE |
1004 | | st_index_t d; |
1005 | | #else |
1006 | 0 | st_index_t peterb; |
1007 | 0 | #endif |
1008 | 0 | st_index_t bin; |
1009 | 0 | st_table_entry *entries = tab->entries; |
1010 | |
|
1011 | 0 | st_assert(tab != NULL); |
1012 | 0 | st_assert(tab->bins != NULL); |
1013 | 0 | ind = hash_bin(hash_value, tab); |
1014 | | #ifdef QUADRATIC_PROBE |
1015 | | d = 1; |
1016 | | #else |
1017 | 0 | peterb = hash_value; |
1018 | 0 | #endif |
1019 | 0 | FOUND_BIN; |
1020 | 0 | for (;;) { |
1021 | 0 | bin = get_bin(tab->bins, get_size_ind(tab), ind); |
1022 | 0 | if (EMPTY_OR_DELETED_BIN_P(bin)) |
1023 | 0 | return ind; |
1024 | 0 | st_assert (entries[bin - ENTRY_BASE].hash != hash_value); |
1025 | | #ifdef QUADRATIC_PROBE |
1026 | | ind = hash_bin(ind + d, tab); |
1027 | | d++; |
1028 | | #else |
1029 | 0 | ind = secondary_hash(ind, tab, &peterb); |
1030 | 0 | #endif |
1031 | 0 | COLLISION; |
1032 | 0 | } |
1033 | 0 | } |
1034 | | |
1035 | | /* Return index of table TAB bin for HASH_VALUE and KEY through |
1036 | | BIN_IND and the pointed value as the function result. Reserve the |
1037 | | bin for inclusion of the corresponding entry into the table if it |
1038 | | is not there yet. We always find such bin as bins array length is |
1039 | | bigger entries array. Although we can reuse a deleted bin, the |
1040 | | result bin value is always empty if the table has no entry with |
1041 | | KEY. Return the entries array index of the found entry or |
1042 | | UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt |
1043 | | during the search, return REBUILT_TABLE_ENTRY_IND. */ |
1044 | | static st_index_t |
1045 | | find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, |
1046 | | st_data_t key, st_index_t *bin_ind) |
1047 | 0 | { |
1048 | 0 | int eq_p, rebuilt_p; |
1049 | 0 | st_index_t ind; |
1050 | 0 | st_hash_t curr_hash_value = *hash_value; |
1051 | | #ifdef QUADRATIC_PROBE |
1052 | | st_index_t d; |
1053 | | #else |
1054 | 0 | st_index_t peterb; |
1055 | 0 | #endif |
1056 | 0 | st_index_t entry_index; |
1057 | 0 | st_index_t first_deleted_bin_ind; |
1058 | 0 | st_table_entry *entries; |
1059 | |
|
1060 | 0 | st_assert(tab != NULL); |
1061 | 0 | st_assert(tab->bins != NULL); |
1062 | 0 | st_assert(tab->entries_bound <= get_allocated_entries(tab)); |
1063 | 0 | st_assert(tab->entries_start <= tab->entries_bound); |
1064 | 0 | ind = hash_bin(curr_hash_value, tab); |
1065 | | #ifdef QUADRATIC_PROBE |
1066 | | d = 1; |
1067 | | #else |
1068 | 0 | peterb = curr_hash_value; |
1069 | 0 | #endif |
1070 | 0 | FOUND_BIN; |
1071 | 0 | first_deleted_bin_ind = UNDEFINED_BIN_IND; |
1072 | 0 | entries = tab->entries; |
1073 | 0 | for (;;) { |
1074 | 0 | entry_index = get_bin(tab->bins, get_size_ind(tab), ind); |
1075 | 0 | if (EMPTY_BIN_P(entry_index)) { |
1076 | 0 | tab->num_entries++; |
1077 | 0 | entry_index = UNDEFINED_ENTRY_IND; |
1078 | 0 | if (first_deleted_bin_ind != UNDEFINED_BIN_IND) { |
1079 | | /* We can reuse bin of a deleted entry. */ |
1080 | 0 | ind = first_deleted_bin_ind; |
1081 | 0 | MARK_BIN_EMPTY(tab, ind); |
1082 | 0 | } |
1083 | 0 | break; |
1084 | 0 | } |
1085 | 0 | else if (! DELETED_BIN_P(entry_index)) { |
1086 | 0 | DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); |
1087 | 0 | if (EXPECT(rebuilt_p, 0)) |
1088 | 0 | return REBUILT_TABLE_ENTRY_IND; |
1089 | 0 | if (eq_p) |
1090 | 0 | break; |
1091 | 0 | } |
1092 | 0 | else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) |
1093 | 0 | first_deleted_bin_ind = ind; |
1094 | | #ifdef QUADRATIC_PROBE |
1095 | | ind = hash_bin(ind + d, tab); |
1096 | | d++; |
1097 | | #else |
1098 | 0 | ind = secondary_hash(ind, tab, &peterb); |
1099 | 0 | #endif |
1100 | 0 | COLLISION; |
1101 | 0 | } |
1102 | 0 | *bin_ind = ind; |
1103 | 0 | return entry_index; |
1104 | 0 | } |
1105 | | |
1106 | | /* Find an entry with KEY in table TAB. Return non-zero if we found |
1107 | | it. Set up *RECORD to the found entry record. */ |
1108 | | int |
1109 | | st_lookup(st_table *tab, st_data_t key, st_data_t *value) |
1110 | 6.76k | { |
1111 | 6.76k | st_index_t bin; |
1112 | 6.76k | st_hash_t hash = do_hash(key, tab); |
1113 | | |
1114 | 6.76k | retry: |
1115 | 6.76k | if (tab->bins == NULL) { |
1116 | 6.76k | bin = find_entry(tab, hash, key); |
1117 | 6.76k | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1118 | 0 | goto retry; |
1119 | 6.76k | if (bin == UNDEFINED_ENTRY_IND) |
1120 | 6.76k | return 0; |
1121 | 6.76k | } |
1122 | 0 | else { |
1123 | 0 | bin = find_table_entry_ind(tab, hash, key); |
1124 | 0 | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1125 | 0 | goto retry; |
1126 | 0 | if (bin == UNDEFINED_ENTRY_IND) |
1127 | 0 | return 0; |
1128 | 0 | bin -= ENTRY_BASE; |
1129 | 0 | } |
1130 | 0 | if (value != 0) |
1131 | 0 | *value = tab->entries[bin].record; |
1132 | 0 | return 1; |
1133 | 6.76k | } |
1134 | | |
1135 | | #ifdef RUBY |
1136 | | /* Find an entry with KEY in table TAB. Return non-zero if we found |
1137 | | it. Set up *RESULT to the found table entry key. */ |
1138 | | int |
1139 | | st_get_key(st_table *tab, st_data_t key, st_data_t *result) |
1140 | | { |
1141 | | st_index_t bin; |
1142 | | st_hash_t hash = do_hash(key, tab); |
1143 | | |
1144 | | retry: |
1145 | | if (tab->bins == NULL) { |
1146 | | bin = find_entry(tab, hash, key); |
1147 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1148 | | goto retry; |
1149 | | if (bin == UNDEFINED_ENTRY_IND) |
1150 | | return 0; |
1151 | | } |
1152 | | else { |
1153 | | bin = find_table_entry_ind(tab, hash, key); |
1154 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1155 | | goto retry; |
1156 | | if (bin == UNDEFINED_ENTRY_IND) |
1157 | | return 0; |
1158 | | bin -= ENTRY_BASE; |
1159 | | } |
1160 | | if (result != 0) |
1161 | | *result = tab->entries[bin].key; |
1162 | | return 1; |
1163 | | } |
1164 | | #endif /* RUBY */ |
1165 | | |
1166 | | /* Check the table and rebuild it if it is necessary. */ |
1167 | | static inline void |
1168 | | rebuild_table_if_necessary(st_table *tab) |
1169 | 9.02k | { |
1170 | 9.02k | st_index_t bound = tab->entries_bound; |
1171 | | |
1172 | 9.02k | if (bound == get_allocated_entries(tab)) |
1173 | 0 | rebuild_table(tab); |
1174 | 9.02k | st_assert(tab->entries_bound < get_allocated_entries(tab)); |
1175 | 9.02k | } |
1176 | | |
1177 | | /* Insert (KEY, VALUE) into table TAB and return zero. If there is |
1178 | | already entry with KEY in the table, return nonzero and and update |
1179 | | the value of the found entry. */ |
1180 | | int |
1181 | | st_insert(st_table *tab, st_data_t key, st_data_t value) |
1182 | 9.02k | { |
1183 | 9.02k | st_table_entry *entry; |
1184 | 9.02k | st_index_t bin; |
1185 | 9.02k | st_index_t ind; |
1186 | 9.02k | st_hash_t hash_value; |
1187 | 9.02k | st_index_t bin_ind; |
1188 | 9.02k | int new_p; |
1189 | | |
1190 | 9.02k | hash_value = do_hash(key, tab); |
1191 | 9.02k | retry: |
1192 | 9.02k | rebuild_table_if_necessary(tab); |
1193 | 9.02k | if (tab->bins == NULL) { |
1194 | 9.02k | bin = find_entry(tab, hash_value, key); |
1195 | 9.02k | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1196 | 0 | goto retry; |
1197 | 9.02k | new_p = bin == UNDEFINED_ENTRY_IND; |
1198 | 9.02k | if (new_p) |
1199 | 9.02k | tab->num_entries++; |
1200 | 9.02k | bin_ind = UNDEFINED_BIN_IND; |
1201 | 9.02k | } |
1202 | 0 | else { |
1203 | 0 | bin = find_table_bin_ptr_and_reserve(tab, &hash_value, |
1204 | 0 | key, &bin_ind); |
1205 | 0 | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1206 | 0 | goto retry; |
1207 | 0 | new_p = bin == UNDEFINED_ENTRY_IND; |
1208 | 0 | bin -= ENTRY_BASE; |
1209 | 0 | } |
1210 | 9.02k | if (new_p) { |
1211 | 9.02k | st_assert(tab->entries_bound < get_allocated_entries(tab)); |
1212 | 9.02k | ind = tab->entries_bound++; |
1213 | 9.02k | entry = &tab->entries[ind]; |
1214 | 9.02k | entry->hash = hash_value; |
1215 | 9.02k | entry->key = key; |
1216 | 9.02k | entry->record = value; |
1217 | 9.02k | if (bin_ind != UNDEFINED_BIN_IND) |
1218 | 0 | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); |
1219 | | #ifdef ST_DEBUG |
1220 | | st_check(tab); |
1221 | | #endif |
1222 | 9.02k | return 0; |
1223 | 9.02k | } |
1224 | 0 | tab->entries[bin].record = value; |
1225 | | #ifdef ST_DEBUG |
1226 | | st_check(tab); |
1227 | | #endif |
1228 | 0 | return 1; |
1229 | 9.02k | } |
1230 | | |
1231 | | #ifdef RUBY |
1232 | | /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have |
1233 | | entry with KEY before the insertion. */ |
1234 | | void |
1235 | | st_add_direct_with_hash(st_table *tab, |
1236 | | st_data_t key, st_data_t value, st_hash_t hash) |
1237 | | { |
1238 | | st_table_entry *entry; |
1239 | | st_index_t ind; |
1240 | | st_index_t bin_ind; |
1241 | | |
1242 | | rebuild_table_if_necessary(tab); |
1243 | | ind = tab->entries_bound++; |
1244 | | entry = &tab->entries[ind]; |
1245 | | entry->hash = hash; |
1246 | | entry->key = key; |
1247 | | entry->record = value; |
1248 | | tab->num_entries++; |
1249 | | if (tab->bins != NULL) { |
1250 | | bin_ind = find_table_bin_ind_direct(tab, hash, key); |
1251 | | st_assert (bin_ind != UNDEFINED_BIN_IND); |
1252 | | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); |
1253 | | } |
1254 | | #ifdef ST_DEBUG |
1255 | | st_check(tab); |
1256 | | #endif |
1257 | | } |
1258 | | |
1259 | | /* Insert (KEY, VALUE) into table TAB. The table should not have |
1260 | | entry with KEY before the insertion. */ |
1261 | | void |
1262 | | st_add_direct(st_table *tab, st_data_t key, st_data_t value) |
1263 | | { |
1264 | | st_hash_t hash_value; |
1265 | | |
1266 | | hash_value = do_hash(key, tab); |
1267 | | st_add_direct_with_hash(tab, key, value, hash_value); |
1268 | | } |
1269 | | |
1270 | | /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If |
1271 | | there is already entry with KEY in the table, return nonzero and |
1272 | | and update the value of the found entry. */ |
1273 | | int |
1274 | | st_insert2(st_table *tab, st_data_t key, st_data_t value, |
1275 | | st_data_t (*func)(st_data_t)) |
1276 | | { |
1277 | | st_table_entry *entry; |
1278 | | st_index_t bin; |
1279 | | st_index_t ind, check; |
1280 | | st_hash_t hash_value; |
1281 | | st_index_t bin_ind; |
1282 | | int new_p; |
1283 | | |
1284 | | hash_value = do_hash(key, tab); |
1285 | | retry: |
1286 | | rebuild_table_if_necessary (tab); |
1287 | | if (tab->bins == NULL) { |
1288 | | bin = find_entry(tab, hash_value, key); |
1289 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1290 | | goto retry; |
1291 | | new_p = bin == UNDEFINED_ENTRY_IND; |
1292 | | if (new_p) |
1293 | | tab->num_entries++; |
1294 | | bin_ind = UNDEFINED_BIN_IND; |
1295 | | } |
1296 | | else { |
1297 | | bin = find_table_bin_ptr_and_reserve(tab, &hash_value, |
1298 | | key, &bin_ind); |
1299 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1300 | | goto retry; |
1301 | | new_p = bin == UNDEFINED_ENTRY_IND; |
1302 | | bin -= ENTRY_BASE; |
1303 | | } |
1304 | | if (new_p) { |
1305 | | st_assert(tab->entries_bound < get_allocated_entries(tab)); |
1306 | | check = tab->rebuilds_num; |
1307 | | key = (*func)(key); |
1308 | | st_assert(check == tab->rebuilds_num); |
1309 | | ind = tab->entries_bound++; |
1310 | | entry = &tab->entries[ind]; |
1311 | | entry->hash = hash_value; |
1312 | | entry->key = key; |
1313 | | entry->record = value; |
1314 | | if (bin_ind != UNDEFINED_BIN_IND) |
1315 | | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); |
1316 | | st_assert(do_hash(key, tab) == hash_value); |
1317 | | #ifdef ST_DEBUG |
1318 | | st_check(tab); |
1319 | | #endif |
1320 | | return 0; |
1321 | | } |
1322 | | tab->entries[bin].record = value; |
1323 | | #ifdef ST_DEBUG |
1324 | | st_check(tab); |
1325 | | #endif |
1326 | | return 1; |
1327 | | } |
1328 | | |
1329 | | /* Create and return a copy of table OLD_TAB. */ |
1330 | | st_table * |
1331 | | st_copy(st_table *old_tab) |
1332 | | { |
1333 | | st_table *new_tab; |
1334 | | |
1335 | | new_tab = (st_table *) malloc(sizeof(st_table)); |
1336 | | #ifndef RUBY |
1337 | | if (new_tab == NULL) |
1338 | | return NULL; |
1339 | | #endif |
1340 | | *new_tab = *old_tab; |
1341 | | if (old_tab->bins == NULL) |
1342 | | new_tab->bins = NULL; |
1343 | | else { |
1344 | | new_tab->bins = (st_index_t *) malloc(bins_size(old_tab)); |
1345 | | #ifndef RUBY |
1346 | | if (new_tab->bins == NULL) { |
1347 | | free(new_tab); |
1348 | | return NULL; |
1349 | | } |
1350 | | #endif |
1351 | | } |
1352 | | new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab) |
1353 | | * sizeof(st_table_entry)); |
1354 | | #ifndef RUBY |
1355 | | if (new_tab->entries == NULL) { |
1356 | | st_free_table(new_tab); |
1357 | | return NULL; |
1358 | | } |
1359 | | #endif |
1360 | | MEMCPY(new_tab->entries, old_tab->entries, st_table_entry, |
1361 | | get_allocated_entries(old_tab)); |
1362 | | if (old_tab->bins != NULL) |
1363 | | MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab)); |
1364 | | #ifdef ST_DEBUG |
1365 | | st_check(new_tab); |
1366 | | #endif |
1367 | | return new_tab; |
1368 | | } |
1369 | | #endif /* RUBY */ |
1370 | | |
1371 | | /* Update the entries start of table TAB after removing an entry |
1372 | | with index N in the array entries. */ |
1373 | | static inline void |
1374 | | update_range_for_deleted(st_table *tab, st_index_t n) |
1375 | 9.02k | { |
1376 | | /* Do not update entries_bound here. Otherwise, we can fill all |
1377 | | bins by deleted entry value before rebuilding the table. */ |
1378 | 9.02k | if (tab->entries_start == n) |
1379 | 9.02k | tab->entries_start = n + 1; |
1380 | 9.02k | } |
1381 | | |
1382 | | #ifdef RUBY |
1383 | | /* Delete entry with KEY from table TAB, set up *VALUE (unless |
1384 | | VALUE is zero) from deleted table entry, and return non-zero. If |
1385 | | there is no entry with KEY in the table, clear *VALUE (unless VALUE |
1386 | | is zero), and return zero. */ |
1387 | | static int |
1388 | | st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) |
1389 | | { |
1390 | | st_table_entry *entry; |
1391 | | st_index_t bin; |
1392 | | st_index_t bin_ind; |
1393 | | st_hash_t hash; |
1394 | | |
1395 | | st_assert(tab != NULL); |
1396 | | hash = do_hash(*key, tab); |
1397 | | retry: |
1398 | | if (tab->bins == NULL) { |
1399 | | bin = find_entry(tab, hash, *key); |
1400 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1401 | | goto retry; |
1402 | | if (bin == UNDEFINED_ENTRY_IND) { |
1403 | | if (value != 0) *value = 0; |
1404 | | return 0; |
1405 | | } |
1406 | | } |
1407 | | else { |
1408 | | bin_ind = find_table_bin_ind(tab, hash, *key); |
1409 | | if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) |
1410 | | goto retry; |
1411 | | if (bin_ind == UNDEFINED_BIN_IND) { |
1412 | | if (value != 0) *value = 0; |
1413 | | return 0; |
1414 | | } |
1415 | | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; |
1416 | | MARK_BIN_DELETED(tab, bin_ind); |
1417 | | } |
1418 | | entry = &tab->entries[bin]; |
1419 | | *key = entry->key; |
1420 | | if (value != 0) *value = entry->record; |
1421 | | MARK_ENTRY_DELETED(entry); |
1422 | | tab->num_entries--; |
1423 | | update_range_for_deleted(tab, bin); |
1424 | | #ifdef ST_DEBUG |
1425 | | st_check(tab); |
1426 | | #endif |
1427 | | return 1; |
1428 | | } |
1429 | | |
1430 | | int |
1431 | | st_delete(st_table *tab, st_data_t *key, st_data_t *value) |
1432 | | { |
1433 | | return st_general_delete(tab, key, value); |
1434 | | } |
1435 | | |
1436 | | /* The function and other functions with suffix '_safe' or '_check' |
1437 | | are originated from the previous implementation of the hash tables. |
1438 | | It was necessary for correct deleting entries during traversing |
1439 | | tables. The current implementation permits deletion during |
1440 | | traversing without a specific way to do this. */ |
1441 | | int |
1442 | | st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, |
1443 | | st_data_t never ATTRIBUTE_UNUSED) |
1444 | | { |
1445 | | return st_general_delete(tab, key, value); |
1446 | | } |
1447 | | |
1448 | | /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and |
1449 | | return zero. Otherwise, remove the first entry in the table. |
1450 | | Return its key through KEY and its record through VALUE (unless |
1451 | | VALUE is zero). */ |
1452 | | int |
1453 | | st_shift(st_table *tab, st_data_t *key, st_data_t *value) |
1454 | | { |
1455 | | st_index_t i, bound; |
1456 | | st_index_t bin; |
1457 | | st_table_entry *entries, *curr_entry_ptr; |
1458 | | st_index_t bin_ind; |
1459 | | |
1460 | | entries = tab->entries; |
1461 | | bound = tab->entries_bound; |
1462 | | for (i = tab->entries_start; i < bound; i++) { |
1463 | | curr_entry_ptr = &entries[i]; |
1464 | | if (! DELETED_ENTRY_P(curr_entry_ptr)) { |
1465 | | st_hash_t entry_hash = curr_entry_ptr->hash; |
1466 | | st_data_t entry_key = curr_entry_ptr->key; |
1467 | | |
1468 | | if (value != 0) *value = curr_entry_ptr->record; |
1469 | | *key = entry_key; |
1470 | | retry: |
1471 | | if (tab->bins == NULL) { |
1472 | | bin = find_entry(tab, entry_hash, entry_key); |
1473 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) { |
1474 | | entries = tab->entries; |
1475 | | goto retry; |
1476 | | } |
1477 | | st_assert(bin != UNDEFINED_ENTRY_IND); |
1478 | | curr_entry_ptr = &entries[bin]; |
1479 | | } |
1480 | | else { |
1481 | | bin_ind = find_table_bin_ind(tab, entry_hash, entry_key); |
1482 | | if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) { |
1483 | | entries = tab->entries; |
1484 | | goto retry; |
1485 | | } |
1486 | | st_assert(bin_ind != UNDEFINED_BIN_IND); |
1487 | | curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) |
1488 | | - ENTRY_BASE]; |
1489 | | MARK_BIN_DELETED(tab, bin_ind); |
1490 | | } |
1491 | | st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key); |
1492 | | MARK_ENTRY_DELETED(curr_entry_ptr); |
1493 | | tab->num_entries--; |
1494 | | update_range_for_deleted(tab, i); |
1495 | | #ifdef ST_DEBUG |
1496 | | st_check(tab); |
1497 | | #endif |
1498 | | return 1; |
1499 | | } |
1500 | | } |
1501 | | st_assert(tab->num_entries == 0); |
1502 | | tab->entries_start = tab->entries_bound = 0; |
1503 | | if (value != 0) *value = 0; |
1504 | | return 0; |
1505 | | } |
1506 | | |
1507 | | /* See comments for function st_delete_safe. */ |
1508 | | void |
1509 | | st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, |
1510 | | st_data_t never ATTRIBUTE_UNUSED) |
1511 | | { |
1512 | | } |
1513 | | |
1514 | | /* Find entry with KEY in table TAB, call FUNC with the key and the |
1515 | | value of the found entry, and non-zero as the 3rd argument. If the |
1516 | | entry is not found, call FUNC with KEY, and 2 zero arguments. If |
1517 | | the call returns ST_CONTINUE, the table will have an entry with key |
1518 | | and value returned by FUNC through the 1st and 2nd parameters. If |
1519 | | the call of FUNC returns ST_DELETE, the table will not have entry |
1520 | | with KEY. The function returns flag of that the entry with KEY was |
1521 | | in the table before the call. */ |
1522 | | int |
1523 | | st_update(st_table *tab, st_data_t key, |
1524 | | st_update_callback_func *func, st_data_t arg) |
1525 | | { |
1526 | | st_table_entry *entry = NULL; /* to avoid uninitialized value warning */ |
1527 | | st_index_t bin = 0; /* Ditto */ |
1528 | | st_table_entry *entries; |
1529 | | st_index_t bin_ind; |
1530 | | st_data_t value = 0, old_key; |
1531 | | st_index_t check; |
1532 | | int retval, existing; |
1533 | | st_hash_t hash = do_hash(key, tab); |
1534 | | |
1535 | | retry: |
1536 | | entries = tab->entries; |
1537 | | if (tab->bins == NULL) { |
1538 | | bin = find_entry(tab, hash, key); |
1539 | | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1540 | | goto retry; |
1541 | | existing = bin != UNDEFINED_ENTRY_IND; |
1542 | | entry = &entries[bin]; |
1543 | | bin_ind = UNDEFINED_BIN_IND; |
1544 | | } |
1545 | | else { |
1546 | | bin_ind = find_table_bin_ind(tab, hash, key); |
1547 | | if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) |
1548 | | goto retry; |
1549 | | existing = bin_ind != UNDEFINED_BIN_IND; |
1550 | | if (existing) { |
1551 | | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; |
1552 | | entry = &entries[bin]; |
1553 | | } |
1554 | | } |
1555 | | if (existing) { |
1556 | | key = entry->key; |
1557 | | value = entry->record; |
1558 | | } |
1559 | | old_key = key; |
1560 | | check = tab->rebuilds_num; |
1561 | | retval = (*func)(&key, &value, arg, existing); |
1562 | | st_assert(check == tab->rebuilds_num); |
1563 | | switch (retval) { |
1564 | | case ST_CONTINUE: |
1565 | | if (! existing) { |
1566 | | st_add_direct_with_hash(tab, key, value, hash); |
1567 | | break; |
1568 | | } |
1569 | | if (old_key != key) { |
1570 | | entry->key = key; |
1571 | | } |
1572 | | entry->record = value; |
1573 | | break; |
1574 | | case ST_DELETE: |
1575 | | if (existing) { |
1576 | | if (bin_ind != UNDEFINED_BIN_IND) |
1577 | | MARK_BIN_DELETED(tab, bin_ind); |
1578 | | MARK_ENTRY_DELETED(entry); |
1579 | | tab->num_entries--; |
1580 | | update_range_for_deleted(tab, bin); |
1581 | | #ifdef ST_DEBUG |
1582 | | st_check(tab); |
1583 | | #endif |
1584 | | } |
1585 | | break; |
1586 | | } |
1587 | | #ifdef ST_DEBUG |
1588 | | st_check(tab); |
1589 | | #endif |
1590 | | return existing; |
1591 | | } |
1592 | | #endif /* RUBY */ |
1593 | | |
1594 | | /* Traverse all entries in table TAB calling FUNC with current entry |
1595 | | key and value and zero. If the call returns ST_STOP, stop |
1596 | | traversing. If the call returns ST_DELETE, delete the current |
1597 | | entry from the table. In case of ST_CHECK or ST_CONTINUE, continue |
1598 | | traversing. The function returns zero unless an error is found. |
1599 | | CHECK_P is flag of st_foreach_check call. The behavior is a bit |
1600 | | different for ST_CHECK and when the current element is removed |
1601 | | during traversing. */ |
1602 | | static inline int |
1603 | | st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg, |
1604 | | int check_p) |
1605 | 2.25k | { |
1606 | 2.25k | st_index_t bin; |
1607 | 2.25k | st_index_t bin_ind; |
1608 | 2.25k | st_table_entry *entries, *curr_entry_ptr; |
1609 | 2.25k | enum st_retval retval; |
1610 | 2.25k | st_index_t i, rebuilds_num; |
1611 | 2.25k | st_hash_t hash; |
1612 | 2.25k | st_data_t key; |
1613 | 2.25k | int error_p, packed_p = tab->bins == NULL; |
1614 | | |
1615 | 2.25k | st_assert(tab->entries_start <= tab->entries_bound); |
1616 | 2.25k | entries = tab->entries; |
1617 | | /* The bound can change inside the loop even without rebuilding |
1618 | | the table, e.g. by an entry inesrtion. */ |
1619 | 11.2k | for (i = tab->entries_start; i < tab->entries_bound; i++) { |
1620 | 9.02k | curr_entry_ptr = &entries[i]; |
1621 | 9.02k | if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) |
1622 | 0 | continue; |
1623 | 9.02k | key = curr_entry_ptr->key; |
1624 | 9.02k | rebuilds_num = tab->rebuilds_num; |
1625 | 9.02k | hash = curr_entry_ptr->hash; |
1626 | 9.02k | retval = (*func)(key, curr_entry_ptr->record, arg, 0); |
1627 | | |
1628 | 9.02k | if (retval == ST_REPLACE && replace) { |
1629 | 0 | st_data_t value; |
1630 | 0 | value = curr_entry_ptr->record; |
1631 | 0 | retval = (*replace)(&key, &value, arg, TRUE); |
1632 | 0 | curr_entry_ptr->key = key; |
1633 | 0 | curr_entry_ptr->record = value; |
1634 | 0 | } |
1635 | | |
1636 | 9.02k | if (rebuilds_num != tab->rebuilds_num) { |
1637 | 0 | retry: |
1638 | 0 | entries = tab->entries; |
1639 | 0 | packed_p = tab->bins == NULL; |
1640 | 0 | if (packed_p) { |
1641 | 0 | i = find_entry(tab, hash, key); |
1642 | 0 | if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) |
1643 | 0 | goto retry; |
1644 | 0 | error_p = i == UNDEFINED_ENTRY_IND; |
1645 | 0 | } |
1646 | 0 | else { |
1647 | 0 | i = find_table_entry_ind(tab, hash, key); |
1648 | 0 | if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) |
1649 | 0 | goto retry; |
1650 | 0 | error_p = i == UNDEFINED_ENTRY_IND; |
1651 | 0 | i -= ENTRY_BASE; |
1652 | 0 | } |
1653 | 0 | if (error_p && check_p) { |
1654 | | /* call func with error notice */ |
1655 | 0 | retval = (*func)(0, 0, arg, 1); |
1656 | | #ifdef ST_DEBUG |
1657 | | st_check(tab); |
1658 | | #endif |
1659 | 0 | return 1; |
1660 | 0 | } |
1661 | 0 | curr_entry_ptr = &entries[i]; |
1662 | 0 | } |
1663 | 9.02k | switch (retval) { |
1664 | 0 | case ST_REPLACE: |
1665 | 0 | break; |
1666 | 0 | case ST_CONTINUE: |
1667 | 0 | break; |
1668 | 0 | case ST_CHECK: |
1669 | 0 | if (check_p) |
1670 | 0 | break; |
1671 | 0 | case ST_STOP: |
1672 | | #ifdef ST_DEBUG |
1673 | | st_check(tab); |
1674 | | #endif |
1675 | 0 | return 0; |
1676 | 9.02k | case ST_DELETE: { |
1677 | 9.02k | st_data_t key = curr_entry_ptr->key; |
1678 | | |
1679 | 9.02k | again: |
1680 | 9.02k | if (packed_p) { |
1681 | 9.02k | bin = find_entry(tab, hash, key); |
1682 | 9.02k | if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) |
1683 | 0 | goto again; |
1684 | 9.02k | if (bin == UNDEFINED_ENTRY_IND) |
1685 | 0 | break; |
1686 | 9.02k | } |
1687 | 0 | else { |
1688 | 0 | bin_ind = find_table_bin_ind(tab, hash, key); |
1689 | 0 | if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) |
1690 | 0 | goto again; |
1691 | 0 | if (bin_ind == UNDEFINED_BIN_IND) |
1692 | 0 | break; |
1693 | 0 | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; |
1694 | 0 | MARK_BIN_DELETED(tab, bin_ind); |
1695 | 0 | } |
1696 | 9.02k | curr_entry_ptr = &entries[bin]; |
1697 | 9.02k | MARK_ENTRY_DELETED(curr_entry_ptr); |
1698 | 9.02k | tab->num_entries--; |
1699 | 9.02k | update_range_for_deleted(tab, bin); |
1700 | | #ifdef ST_DEBUG |
1701 | | st_check(tab); |
1702 | | #endif |
1703 | 9.02k | break; |
1704 | 9.02k | } |
1705 | 9.02k | } |
1706 | 9.02k | } |
1707 | | #ifdef ST_DEBUG |
1708 | | st_check(tab); |
1709 | | #endif |
1710 | 2.25k | return 0; |
1711 | 2.25k | } |
1712 | | |
1713 | | #ifdef RUBY |
1714 | | int |
1715 | | st_foreach_with_replace(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg) |
1716 | | { |
1717 | | return st_general_foreach(tab, func, replace, arg, TRUE); |
1718 | | } |
1719 | | #endif /* RUBY */ |
1720 | | |
1721 | | int |
1722 | | st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg) |
1723 | 2.25k | { |
1724 | 2.25k | return st_general_foreach(tab, func, NULL, arg, FALSE); |
1725 | 2.25k | } |
1726 | | |
1727 | | #ifdef RUBY |
1728 | | /* See comments for function st_delete_safe. */ |
1729 | | int |
1730 | | st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg, |
1731 | | st_data_t never ATTRIBUTE_UNUSED) |
1732 | | { |
1733 | | return st_general_foreach(tab, func, NULL, arg, TRUE); |
1734 | | } |
1735 | | |
1736 | | /* Set up array KEYS by at most SIZE keys of head table TAB entries. |
1737 | | Return the number of keys set up in array KEYS. */ |
1738 | | static inline st_index_t |
1739 | | st_general_keys(st_table *tab, st_data_t *keys, st_index_t size) |
1740 | | { |
1741 | | st_index_t i, bound; |
1742 | | st_data_t key, *keys_start, *keys_end; |
1743 | | st_table_entry *curr_entry_ptr, *entries = tab->entries; |
1744 | | |
1745 | | bound = tab->entries_bound; |
1746 | | keys_start = keys; |
1747 | | keys_end = keys + size; |
1748 | | for (i = tab->entries_start; i < bound; i++) { |
1749 | | if (keys == keys_end) |
1750 | | break; |
1751 | | curr_entry_ptr = &entries[i]; |
1752 | | key = curr_entry_ptr->key; |
1753 | | if (! DELETED_ENTRY_P(curr_entry_ptr)) |
1754 | | *keys++ = key; |
1755 | | } |
1756 | | |
1757 | | return keys - keys_start; |
1758 | | } |
1759 | | |
1760 | | st_index_t |
1761 | | st_keys(st_table *tab, st_data_t *keys, st_index_t size) |
1762 | | { |
1763 | | return st_general_keys(tab, keys, size); |
1764 | | } |
1765 | | |
1766 | | /* See comments for function st_delete_safe. */ |
1767 | | st_index_t |
1768 | | st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, |
1769 | | st_data_t never ATTRIBUTE_UNUSED) |
1770 | | { |
1771 | | return st_general_keys(tab, keys, size); |
1772 | | } |
1773 | | |
1774 | | /* Set up array VALUES by at most SIZE values of head table TAB |
1775 | | entries. Return the number of values set up in array VALUES. */ |
1776 | | static inline st_index_t |
1777 | | st_general_values(st_table *tab, st_data_t *values, st_index_t size) |
1778 | | { |
1779 | | st_index_t i, bound; |
1780 | | st_data_t *values_start, *values_end; |
1781 | | st_table_entry *curr_entry_ptr, *entries = tab->entries; |
1782 | | |
1783 | | values_start = values; |
1784 | | values_end = values + size; |
1785 | | bound = tab->entries_bound; |
1786 | | st_assert(bound != 0); |
1787 | | for (i = tab->entries_start; i < bound; i++) { |
1788 | | if (values == values_end) |
1789 | | break; |
1790 | | curr_entry_ptr = &entries[i]; |
1791 | | if (! DELETED_ENTRY_P(curr_entry_ptr)) |
1792 | | *values++ = curr_entry_ptr->record; |
1793 | | } |
1794 | | |
1795 | | return values - values_start; |
1796 | | } |
1797 | | |
1798 | | st_index_t |
1799 | | st_values(st_table *tab, st_data_t *values, st_index_t size) |
1800 | | { |
1801 | | return st_general_values(tab, values, size); |
1802 | | } |
1803 | | |
1804 | | /* See comments for function st_delete_safe. */ |
1805 | | st_index_t |
1806 | | st_values_check(st_table *tab, st_data_t *values, st_index_t size, |
1807 | | st_data_t never ATTRIBUTE_UNUSED) |
1808 | | { |
1809 | | return st_general_values(tab, values, size); |
1810 | | } |
1811 | | |
1812 | | #define FNV1_32A_INIT 0x811c9dc5 |
1813 | | |
1814 | | /* |
1815 | | * 32 bit magic FNV-1a prime |
1816 | | */ |
1817 | | #define FNV_32_PRIME 0x01000193 |
1818 | | |
1819 | | #ifndef UNALIGNED_WORD_ACCESS |
1820 | | # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
1821 | | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ |
1822 | | defined(__powerpc64__) || \ |
1823 | | defined(__mc68020__) |
1824 | | # define UNALIGNED_WORD_ACCESS 1 |
1825 | | # endif |
1826 | | #endif |
1827 | | #ifndef UNALIGNED_WORD_ACCESS |
1828 | | # define UNALIGNED_WORD_ACCESS 0 |
1829 | | #endif |
1830 | | |
1831 | | /* This hash function is quite simplified MurmurHash3 |
1832 | | * Simplification is legal, cause most of magic still happens in finalizator. |
1833 | | * And finalizator is almost the same as in MurmurHash3 */ |
1834 | | #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y)) |
1835 | | #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n))) |
1836 | | |
1837 | | #if ST_INDEX_BITS <= 32 |
1838 | | #define C1 (st_index_t)0xcc9e2d51 |
1839 | | #define C2 (st_index_t)0x1b873593 |
1840 | | #else |
1841 | | #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5); |
1842 | | #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f); |
1843 | | #endif |
1844 | | NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k)); |
1845 | | NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h)); |
1846 | | NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h)); |
1847 | | |
1848 | | static inline st_index_t |
1849 | | murmur_step(st_index_t h, st_index_t k) |
1850 | | { |
1851 | | #if ST_INDEX_BITS <= 32 |
1852 | | #define r1 (17) |
1853 | | #define r2 (11) |
1854 | | #else |
1855 | | #define r1 (33) |
1856 | | #define r2 (24) |
1857 | | #endif |
1858 | | k *= C1; |
1859 | | h ^= ROTL(k, r1); |
1860 | | h *= C2; |
1861 | | h = ROTL(h, r2); |
1862 | | return h; |
1863 | | } |
1864 | | #undef r1 |
1865 | | #undef r2 |
1866 | | |
1867 | | static inline st_index_t |
1868 | | murmur_finish(st_index_t h) |
1869 | | { |
1870 | | #if ST_INDEX_BITS <= 32 |
1871 | | #define r1 (16) |
1872 | | #define r2 (13) |
1873 | | #define r3 (16) |
1874 | | const st_index_t c1 = 0x85ebca6b; |
1875 | | const st_index_t c2 = 0xc2b2ae35; |
1876 | | #else |
1877 | | /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */ |
1878 | | #define r1 (30) |
1879 | | #define r2 (27) |
1880 | | #define r3 (31) |
1881 | | const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9); |
1882 | | const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb); |
1883 | | #endif |
1884 | | #if ST_INDEX_BITS > 64 |
1885 | | h ^= h >> 64; |
1886 | | h *= c2; |
1887 | | h ^= h >> 65; |
1888 | | #endif |
1889 | | h ^= h >> r1; |
1890 | | h *= c1; |
1891 | | h ^= h >> r2; |
1892 | | h *= c2; |
1893 | | h ^= h >> r3; |
1894 | | return h; |
1895 | | } |
1896 | | #undef r1 |
1897 | | #undef r2 |
1898 | | #undef r3 |
1899 | | |
1900 | | st_index_t |
1901 | | st_hash(const void *ptr, size_t len, st_index_t h) |
1902 | | { |
1903 | | const char *data = ptr; |
1904 | | st_index_t t = 0; |
1905 | | size_t l = len; |
1906 | | |
1907 | | #define data_at(n) (st_index_t)((unsigned char)data[(n)]) |
1908 | | #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0) |
1909 | | #if SIZEOF_ST_INDEX_T > 4 |
1910 | | #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4 |
1911 | | #if SIZEOF_ST_INDEX_T > 8 |
1912 | | #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \ |
1913 | | UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8 |
1914 | | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16 |
1915 | | #endif |
1916 | | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8 |
1917 | | #else |
1918 | | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4 |
1919 | | #endif |
1920 | | #undef SKIP_TAIL |
1921 | | if (len >= sizeof(st_index_t)) { |
1922 | | #if !UNALIGNED_WORD_ACCESS |
1923 | | int align = (int)((st_data_t)data % sizeof(st_index_t)); |
1924 | | if (align) { |
1925 | | st_index_t d = 0; |
1926 | | int sl, sr, pack; |
1927 | | |
1928 | | switch (align) { |
1929 | | #ifdef WORDS_BIGENDIAN |
1930 | | # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ |
1931 | | t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2) |
1932 | | #else |
1933 | | # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ |
1934 | | t |= data_at(n) << CHAR_BIT*(n) |
1935 | | #endif |
1936 | | UNALIGNED_ADD_ALL; |
1937 | | #undef UNALIGNED_ADD |
1938 | | } |
1939 | | |
1940 | | #ifdef WORDS_BIGENDIAN |
1941 | | t >>= (CHAR_BIT * align) - CHAR_BIT; |
1942 | | #else |
1943 | | t <<= (CHAR_BIT * align); |
1944 | | #endif |
1945 | | |
1946 | | data += sizeof(st_index_t)-align; |
1947 | | len -= sizeof(st_index_t)-align; |
1948 | | |
1949 | | sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align); |
1950 | | sr = CHAR_BIT * align; |
1951 | | |
1952 | | while (len >= sizeof(st_index_t)) { |
1953 | | d = *(st_index_t *)data; |
1954 | | #ifdef WORDS_BIGENDIAN |
1955 | | t = (t << sr) | (d >> sl); |
1956 | | #else |
1957 | | t = (t >> sr) | (d << sl); |
1958 | | #endif |
1959 | | h = murmur_step(h, t); |
1960 | | t = d; |
1961 | | data += sizeof(st_index_t); |
1962 | | len -= sizeof(st_index_t); |
1963 | | } |
1964 | | |
1965 | | pack = len < (size_t)align ? (int)len : align; |
1966 | | d = 0; |
1967 | | switch (pack) { |
1968 | | #ifdef WORDS_BIGENDIAN |
1969 | | # define UNALIGNED_ADD(n) case (n) + 1: \ |
1970 | | d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) |
1971 | | #else |
1972 | | # define UNALIGNED_ADD(n) case (n) + 1: \ |
1973 | | d |= data_at(n) << CHAR_BIT*(n) |
1974 | | #endif |
1975 | | UNALIGNED_ADD_ALL; |
1976 | | #undef UNALIGNED_ADD |
1977 | | } |
1978 | | #ifdef WORDS_BIGENDIAN |
1979 | | t = (t << sr) | (d >> sl); |
1980 | | #else |
1981 | | t = (t >> sr) | (d << sl); |
1982 | | #endif |
1983 | | |
1984 | | if (len < (size_t)align) goto skip_tail; |
1985 | | # define SKIP_TAIL 1 |
1986 | | h = murmur_step(h, t); |
1987 | | data += pack; |
1988 | | len -= pack; |
1989 | | } |
1990 | | else |
1991 | | #endif |
1992 | | #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED |
1993 | | #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t)) |
1994 | | #else |
1995 | | #define aligned_data data |
1996 | | #endif |
1997 | | { |
1998 | | do { |
1999 | | h = murmur_step(h, *(st_index_t *)aligned_data); |
2000 | | data += sizeof(st_index_t); |
2001 | | len -= sizeof(st_index_t); |
2002 | | } while (len >= sizeof(st_index_t)); |
2003 | | } |
2004 | | } |
2005 | | |
2006 | | t = 0; |
2007 | | switch (len) { |
2008 | | #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8 |
2009 | | /* in this case byteorder doesn't really matter */ |
2010 | | #if SIZEOF_ST_INDEX_T > 4 |
2011 | | case 7: t |= data_at(6) << 48; |
2012 | | case 6: t |= data_at(5) << 40; |
2013 | | case 5: t |= data_at(4) << 32; |
2014 | | case 4: |
2015 | | t |= (st_index_t)*(uint32_t*)aligned_data; |
2016 | | goto skip_tail; |
2017 | | # define SKIP_TAIL 1 |
2018 | | #endif |
2019 | | case 3: t |= data_at(2) << 16; |
2020 | | case 2: t |= data_at(1) << 8; |
2021 | | case 1: t |= data_at(0); |
2022 | | #else |
2023 | | #ifdef WORDS_BIGENDIAN |
2024 | | # define UNALIGNED_ADD(n) case (n) + 1: \ |
2025 | | t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) |
2026 | | #else |
2027 | | # define UNALIGNED_ADD(n) case (n) + 1: \ |
2028 | | t |= data_at(n) << CHAR_BIT*(n) |
2029 | | #endif |
2030 | | UNALIGNED_ADD_ALL; |
2031 | | #undef UNALIGNED_ADD |
2032 | | #endif |
2033 | | #ifdef SKIP_TAIL |
2034 | | skip_tail: |
2035 | | #endif |
2036 | | h ^= t; h -= ROTL(t, 7); |
2037 | | h *= C2; |
2038 | | } |
2039 | | h ^= l; |
2040 | | #undef aligned_data |
2041 | | |
2042 | | return murmur_finish(h); |
2043 | | } |
2044 | | |
2045 | | st_index_t |
2046 | | st_hash_uint32(st_index_t h, uint32_t i) |
2047 | | { |
2048 | | return murmur_step(h, i); |
2049 | | } |
2050 | | |
2051 | | NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i)); |
2052 | | st_index_t |
2053 | | st_hash_uint(st_index_t h, st_index_t i) |
2054 | | { |
2055 | | i += h; |
2056 | | /* no matter if it is BigEndian or LittleEndian, |
2057 | | * we hash just integers */ |
2058 | | #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8 |
2059 | | h = murmur_step(h, i >> 8*8); |
2060 | | #endif |
2061 | | h = murmur_step(h, i); |
2062 | | return h; |
2063 | | } |
2064 | | |
2065 | | st_index_t |
2066 | | st_hash_end(st_index_t h) |
2067 | | { |
2068 | | h = murmur_finish(h); |
2069 | | return h; |
2070 | | } |
2071 | | |
2072 | | #undef st_hash_start |
2073 | | st_index_t |
2074 | | st_hash_start(st_index_t h) |
2075 | | { |
2076 | | return h; |
2077 | | } |
2078 | | |
2079 | | static st_index_t |
2080 | | strhash(st_data_t arg) |
2081 | | { |
2082 | | register const char *string = (const char *)arg; |
2083 | | return st_hash(string, strlen(string), FNV1_32A_INIT); |
2084 | | } |
2085 | | |
2086 | | int |
2087 | | st_locale_insensitive_strcasecmp(const char *s1, const char *s2) |
2088 | | { |
2089 | | char c1, c2; |
2090 | | |
2091 | | while (1) { |
2092 | | c1 = *s1++; |
2093 | | c2 = *s2++; |
2094 | | if (c1 == '\0' || c2 == '\0') { |
2095 | | if (c1 != '\0') return 1; |
2096 | | if (c2 != '\0') return -1; |
2097 | | return 0; |
2098 | | } |
2099 | | if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A'; |
2100 | | if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A'; |
2101 | | if (c1 != c2) { |
2102 | | if (c1 > c2) |
2103 | | return 1; |
2104 | | else |
2105 | | return -1; |
2106 | | } |
2107 | | } |
2108 | | } |
2109 | | |
2110 | | int |
2111 | | st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n) |
2112 | | { |
2113 | | char c1, c2; |
2114 | | size_t i; |
2115 | | |
2116 | | for (i = 0; i < n; i++) { |
2117 | | c1 = *s1++; |
2118 | | c2 = *s2++; |
2119 | | if (c1 == '\0' || c2 == '\0') { |
2120 | | if (c1 != '\0') return 1; |
2121 | | if (c2 != '\0') return -1; |
2122 | | return 0; |
2123 | | } |
2124 | | if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A'; |
2125 | | if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A'; |
2126 | | if (c1 != c2) { |
2127 | | if (c1 > c2) |
2128 | | return 1; |
2129 | | else |
2130 | | return -1; |
2131 | | } |
2132 | | } |
2133 | | return 0; |
2134 | | } |
2135 | | |
2136 | | NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t))); |
2137 | | static st_index_t |
2138 | | strcasehash(st_data_t arg) |
2139 | | { |
2140 | | register const char *string = (const char *)arg; |
2141 | | register st_index_t hval = FNV1_32A_INIT; |
2142 | | |
2143 | | /* |
2144 | | * FNV-1a hash each octet in the buffer |
2145 | | */ |
2146 | | while (*string) { |
2147 | | unsigned int c = (unsigned char)*string++; |
2148 | | if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A'; |
2149 | | hval ^= c; |
2150 | | |
2151 | | /* multiply by the 32 bit FNV magic prime mod 2^32 */ |
2152 | | hval *= FNV_32_PRIME; |
2153 | | } |
2154 | | return hval; |
2155 | | } |
2156 | | |
2157 | | int |
2158 | | st_numcmp(st_data_t x, st_data_t y) |
2159 | | { |
2160 | | return x != y; |
2161 | | } |
2162 | | |
2163 | | st_index_t |
2164 | | st_numhash(st_data_t n) |
2165 | | { |
2166 | | enum {s1 = 11, s2 = 3}; |
2167 | | return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2)); |
2168 | | } |
2169 | | |
2170 | | /* Expand TAB to be suitable for holding SIZ entries in total. |
2171 | | Pre-existing entries remain not deleted inside of TAB, but its bins |
2172 | | are cleared to expect future reconstruction. See rehash below. */ |
2173 | | static void |
2174 | | st_expand_table(st_table *tab, st_index_t siz) |
2175 | | { |
2176 | | st_table *tmp; |
2177 | | st_index_t n; |
2178 | | |
2179 | | if (siz <= get_allocated_entries(tab)) |
2180 | | return; /* enough room already */ |
2181 | | |
2182 | | tmp = st_init_table_with_size(tab->type, siz); |
2183 | | n = get_allocated_entries(tab); |
2184 | | MEMCPY(tmp->entries, tab->entries, st_table_entry, n); |
2185 | | free(tab->entries); |
2186 | | if (tab->bins != NULL) |
2187 | | free(tab->bins); |
2188 | | if (tmp->bins != NULL) |
2189 | | free(tmp->bins); |
2190 | | tab->entry_power = tmp->entry_power; |
2191 | | tab->bin_power = tmp->bin_power; |
2192 | | tab->size_ind = tmp->size_ind; |
2193 | | tab->entries = tmp->entries; |
2194 | | tab->bins = NULL; |
2195 | | tab->rebuilds_num++; |
2196 | | free(tmp); |
2197 | | } |
2198 | | |
2199 | | /* Rehash using linear search. Return TRUE if we found that the table |
2200 | | was rebuilt. */ |
2201 | | static int |
2202 | | st_rehash_linear(st_table *tab) |
2203 | | { |
2204 | | int eq_p, rebuilt_p; |
2205 | | st_index_t i, j; |
2206 | | st_table_entry *p, *q; |
2207 | | if (tab->bins) { |
2208 | | free(tab->bins); |
2209 | | tab->bins = NULL; |
2210 | | } |
2211 | | for (i = tab->entries_start; i < tab->entries_bound; i++) { |
2212 | | p = &tab->entries[i]; |
2213 | | if (DELETED_ENTRY_P(p)) |
2214 | | continue; |
2215 | | for (j = i + 1; j < tab->entries_bound; j++) { |
2216 | | q = &tab->entries[j]; |
2217 | | if (DELETED_ENTRY_P(q)) |
2218 | | continue; |
2219 | | DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p); |
2220 | | if (EXPECT(rebuilt_p, 0)) |
2221 | | return TRUE; |
2222 | | if (eq_p) { |
2223 | | st_assert(p < q); |
2224 | | *p = *q; |
2225 | | MARK_ENTRY_DELETED(q); |
2226 | | tab->num_entries--; |
2227 | | update_range_for_deleted(tab, j); |
2228 | | } |
2229 | | } |
2230 | | } |
2231 | | return FALSE; |
2232 | | } |
2233 | | |
2234 | | /* Rehash using index. Return TRUE if we found that the table was |
2235 | | rebuilt. */ |
2236 | | static int |
2237 | | st_rehash_indexed(st_table *tab) |
2238 | | { |
2239 | | int eq_p, rebuilt_p; |
2240 | | st_index_t i; |
2241 | | st_index_t const n = bins_size(tab); |
2242 | | unsigned int const size_ind = get_size_ind(tab); |
2243 | | st_index_t *bins = realloc(tab->bins, n); |
2244 | | st_assert(bins != NULL); |
2245 | | tab->bins = bins; |
2246 | | initialize_bins(tab); |
2247 | | for (i = tab->entries_start; i < tab->entries_bound; i++) { |
2248 | | st_table_entry *p = &tab->entries[i]; |
2249 | | st_index_t ind; |
2250 | | #ifdef QUADRATIC_PROBE |
2251 | | st_index_t d = 1; |
2252 | | #else |
2253 | | st_index_t peterb = p->hash; |
2254 | | #endif |
2255 | | |
2256 | | if (DELETED_ENTRY_P(p)) |
2257 | | continue; |
2258 | | |
2259 | | ind = hash_bin(p->hash, tab); |
2260 | | for(;;) { |
2261 | | st_index_t bin = get_bin(bins, size_ind, ind); |
2262 | | if (EMPTY_OR_DELETED_BIN_P(bin)) { |
2263 | | /* ok, new room */ |
2264 | | set_bin(bins, size_ind, ind, i + ENTRY_BASE); |
2265 | | break; |
2266 | | } |
2267 | | else { |
2268 | | st_table_entry *q = &tab->entries[bin - ENTRY_BASE]; |
2269 | | DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p); |
2270 | | if (EXPECT(rebuilt_p, 0)) |
2271 | | return TRUE; |
2272 | | if (eq_p) { |
2273 | | /* duplicated key; delete it */ |
2274 | | st_assert(q < p); |
2275 | | q->record = p->record; |
2276 | | MARK_ENTRY_DELETED(p); |
2277 | | tab->num_entries--; |
2278 | | update_range_for_deleted(tab, bin); |
2279 | | break; |
2280 | | } |
2281 | | else { |
2282 | | /* hash collision; skip it */ |
2283 | | #ifdef QUADRATIC_PROBE |
2284 | | ind = hash_bin(ind + d, tab); |
2285 | | d++; |
2286 | | #else |
2287 | | ind = secondary_hash(ind, tab, &peterb); |
2288 | | #endif |
2289 | | } |
2290 | | } |
2291 | | } |
2292 | | } |
2293 | | return FALSE; |
2294 | | } |
2295 | | |
2296 | | /* Reconstruct TAB's bins according to TAB's entries. This function |
2297 | | permits conflicting keys inside of entries. No errors are reported |
2298 | | then. All but one of them are discarded silently. */ |
2299 | | static void |
2300 | | st_rehash(st_table *tab) |
2301 | | { |
2302 | | int rebuilt_p; |
2303 | | |
2304 | | do { |
2305 | | if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) |
2306 | | rebuilt_p = st_rehash_linear(tab); |
2307 | | else |
2308 | | rebuilt_p = st_rehash_indexed(tab); |
2309 | | } while (rebuilt_p); |
2310 | | } |
2311 | | |
2312 | | static st_data_t |
2313 | | st_stringify(VALUE key) |
2314 | | { |
2315 | | return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ? |
2316 | | rb_hash_key_str(key) : key; |
2317 | | } |
2318 | | |
2319 | | static void |
2320 | | st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val) |
2321 | | { |
2322 | | st_data_t k = st_stringify(key); |
2323 | | st_table_entry e; |
2324 | | e.hash = do_hash(k, tab); |
2325 | | e.key = k; |
2326 | | e.record = val; |
2327 | | |
2328 | | tab->entries[tab->entries_bound++] = e; |
2329 | | tab->num_entries++; |
2330 | | RB_OBJ_WRITTEN(hash, Qundef, k); |
2331 | | RB_OBJ_WRITTEN(hash, Qundef, val); |
2332 | | } |
2333 | | |
2334 | | static void |
2335 | | st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash) |
2336 | | { |
2337 | | long i; |
2338 | | |
2339 | | for (i = 0; i < argc; /* */) { |
2340 | | st_data_t k = st_stringify(argv[i++]); |
2341 | | st_data_t v = argv[i++]; |
2342 | | st_insert(tab, k, v); |
2343 | | RB_OBJ_WRITTEN(hash, Qundef, k); |
2344 | | RB_OBJ_WRITTEN(hash, Qundef, v); |
2345 | | } |
2346 | | } |
2347 | | |
2348 | | static void |
2349 | | st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash) |
2350 | | { |
2351 | | long i; |
2352 | | |
2353 | | /* push elems */ |
2354 | | for (i = 0; i < argc; /* */) { |
2355 | | VALUE key = argv[i++]; |
2356 | | VALUE val = argv[i++]; |
2357 | | st_insert_single(tab, hash, key, val); |
2358 | | } |
2359 | | |
2360 | | /* reindex */ |
2361 | | st_rehash(tab); |
2362 | | } |
2363 | | |
2364 | | /* Mimics ruby's { foo => bar } syntax. This function is subpart |
2365 | | of rb_hash_bulk_insert. */ |
2366 | | void |
2367 | | rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash) |
2368 | | { |
2369 | | st_index_t n, size = argc / 2; |
2370 | | st_table *tab = RHASH_ST_TABLE(hash); |
2371 | | |
2372 | | tab = RHASH_TBL_RAW(hash); |
2373 | | n = tab->entries_bound + size; |
2374 | | st_expand_table(tab, n); |
2375 | | if (UNLIKELY(tab->num_entries)) |
2376 | | st_insert_generic(tab, argc, argv, hash); |
2377 | | else if (argc <= 2) |
2378 | | st_insert_single(tab, hash, argv[0], argv[1]); |
2379 | | else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) |
2380 | | st_insert_linear(tab, argc, argv, hash); |
2381 | | else |
2382 | | st_insert_generic(tab, argc, argv, hash); |
2383 | | } |
2384 | | #endif /* RUBY */ |