Coverage Report

Created: 2023-03-26 06:05

/src/fluent-bit/lib/onigmo/st.c
Line
Count
Source (jump to first uncovered line)
1
/* This is a public domain general purpose hash table package
2
   originally written by Peter Moore @ UCB.
3
4
   The hash table data structures were redesigned and the package was
5
   rewritten by Vladimir Makarov <vmakarov@redhat.com>.  */
6
7
/* The original package implemented classic bucket-based hash tables
8
   with entries doubly linked for an access by their insertion order.
9
   To decrease pointer chasing and as a consequence to improve a data
10
   locality the current implementation is based on storing entries in
11
   an array and using hash tables with open addressing.  The current
12
   entries are more compact in comparison with the original ones and
13
   this also improves the data locality.
14
15
   The hash table has two arrays called *bins* and *entries*.
16
17
     bins:
18
    -------
19
   |       |                  entries array:
20
   |-------|            --------------------------------
21
   | index |           |      | entry:  |        |      |
22
   |-------|           |      |         |        |      |
23
   | ...   |           | ...  | hash    |  ...   | ...  |
24
   |-------|           |      | key     |        |      |
25
   | empty |           |      | record  |        |      |
26
   |-------|            --------------------------------
27
   | ...   |                   ^                  ^
28
   |-------|                   |_ entries start   |_ entries bound
29
   |deleted|
30
    -------
31
32
   o The entry array contains table entries in the same order as they
33
     were inserted.
34
35
     When the first entry is deleted, a variable containing index of
36
     the current first entry (*entries start*) is changed.  In all
37
     other cases of the deletion, we just mark the entry as deleted by
38
     using a reserved hash value.
39
40
     Such organization of the entry storage makes operations of the
41
     table shift and the entries traversal very fast.
42
43
   o The bins provide access to the entries by their keys.  The
44
     key hash is mapped to a bin containing *index* of the
45
     corresponding entry in the entry array.
46
47
     The bin array size is always power of two, it makes mapping very
48
     fast by using the corresponding lower bits of the hash.
49
     Generally it is not a good idea to ignore some part of the hash.
50
     But alternative approach is worse.  For example, we could use a
51
     modulo operation for mapping and a prime number for the size of
52
     the bin array.  Unfortunately, the modulo operation for big
53
     64-bit numbers are extremely slow (it takes more than 100 cycles
54
     on modern Intel CPUs).
55
56
     Still other bits of the hash value are used when the mapping
57
     results in a collision.  In this case we use a secondary hash
58
     value which is a result of a function of the collision bin
59
     index and the original hash value.  The function choice
60
     guarantees that we can traverse all bins and finally find the
61
     corresponding bin as after several iterations the function
62
     becomes a full cycle linear congruential generator because it
63
     satisfies requirements of the Hull-Dobell theorem.
64
65
     When an entry is removed from the table besides marking the
66
     hash in the corresponding entry described above, we also mark
67
     the bin by a special value in order to find entries which had
68
     a collision with the removed entries.
69
70
     There are two reserved values for the bins.  One denotes an
71
     empty bin, another one denotes a bin for a deleted entry.
72
73
   o The length of the bin array is at least two times more than the
74
     entry array length.  This keeps the table load factor healthy.
75
     The trigger of rebuilding the table is always a case when we can
76
     not insert an entry anymore at the entries bound.  We could
77
     change the entries bound too in case of deletion but than we need
78
     a special code to count bins with corresponding deleted entries
79
     and reset the bin values when there are too many bins
80
     corresponding deleted entries
81
82
     Table rebuilding is done by creation of a new entry array and
83
     bins of an appropriate size.  We also try to reuse the arrays
84
     in some cases by compacting the array and removing deleted
85
     entries.
86
87
   o To save memory very small tables have no allocated arrays
88
     bins.  We use a linear search for an access by a key.
89
90
   o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91
     bins depending on the current hash table size.
92
93
   o The implementation takes into account that the table can be
94
     rebuilt during hashing or comparison functions.  It can happen if
95
     the functions are implemented in Ruby and a thread switch occurs
96
     during their execution.
97
98
   This implementation speeds up the Ruby hash table benchmarks in
99
   average by more 40% on Intel Haswell CPU.
100
101
*/
102
103
#ifdef RUBY
104
#include "internal.h"
105
#else
106
#include "regint.h"
107
#include "st.h"
108
#endif
109
110
#include <stdio.h>
111
#include <stdlib.h>
112
#include <string.h>
113
#include <assert.h>
114
115
#ifdef __GNUC__
116
0
#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
117
16.5k
#define EXPECT(expr, val) __builtin_expect(expr, val)
118
#define ATTRIBUTE_UNUSED  __attribute__((unused))
119
#else
120
#define PREFETCH(addr, write_p)
121
#define EXPECT(expr, val) (expr)
122
#define ATTRIBUTE_UNUSED
123
#endif
124
125
#ifdef ST_DEBUG
126
#define st_assert assert
127
#else
128
4.80k
#define st_assert(cond) ((void)(0 && (cond)))
129
#endif
130
131
/* The type of hashes.  */
132
typedef st_index_t st_hash_t;
133
134
struct st_table_entry {
135
    st_hash_t hash;
136
    st_data_t key;
137
    st_data_t record;
138
};
139
140
#ifdef RUBY
141
#define type_numhash st_hashtype_num
142
static const struct st_hash_type st_hashtype_num = {
143
    st_numcmp,
144
    st_numhash,
145
};
146
147
/* extern int strcmp(const char *, const char *); */
148
static st_index_t strhash(st_data_t);
149
static const struct st_hash_type type_strhash = {
150
    strcmp,
151
    strhash,
152
};
153
154
static st_index_t strcasehash(st_data_t);
155
static const struct st_hash_type type_strcasehash = {
156
    st_locale_insensitive_strcasecmp,
157
    strcasehash,
158
};
159
#endif /* RUBY */
160
161
/* Value used to catch uninitialized entries/bins during debugging.
162
   There is a possibility for a false alarm, but its probability is
163
   extremely small.  */
164
#define ST_INIT_VAL 0xafafafafafafafaf
165
#define ST_INIT_VAL_BYTE 0xafa
166
167
#ifdef RUBY
168
#undef malloc
169
#undef realloc
170
#undef calloc
171
#undef free
172
#define malloc ruby_xmalloc
173
#define calloc ruby_xcalloc
174
#define realloc ruby_xrealloc
175
#define free ruby_xfree
176
#else /* RUBY */
177
#define MEMCPY(p1,p2,type,n)  memcpy((p1), (p2), sizeof(type)*(n))
178
#endif /* RUBY */
179
180
2.13k
#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
181
#define PTR_EQUAL(tab, ptr, hash_val, key_) \
182
8.54k
    ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
183
184
/* As PRT_EQUAL only its result is returned in RES.  REBUILT_P is set
185
   up to TRUE if the table is rebuilt during the comparison.  */
186
#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
187
8.54k
    do {                 \
188
8.54k
  unsigned int _old_rebuilds_num = (tab)->rebuilds_num;       \
189
8.54k
  res = PTR_EQUAL(tab, ptr, hash_val, key);        \
190
8.54k
  rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num;     \
191
8.54k
    } while (FALSE)
192
193
/* Features of a table.  */
194
struct st_features {
195
    /* Power of 2 used for number of allocated entries.  */
196
    unsigned char entry_power;
197
    /* Power of 2 used for number of allocated bins.  Depending on the
198
       table size, the number of bins is 2-4 times more than the
199
       number of entries.  */
200
    unsigned char bin_power;
201
    /* Enumeration of sizes of bins (8-bit, 16-bit etc).  */
202
    unsigned char size_ind;
203
    /* Bins are packed in words of type st_index_t.  The following is
204
       a size of bins counted by words.  */
205
    st_index_t bins_words;
206
};
207
208
/* Features of all possible size tables.  */
209
#if SIZEOF_ST_INDEX_T == 8
210
534
#define MAX_POWER2 62
211
static const struct st_features features[] = {
212
    {0, 1, 0, 0x0},
213
    {1, 2, 0, 0x1},
214
    {2, 3, 0, 0x1},
215
    {3, 4, 0, 0x2},
216
    {4, 5, 0, 0x4},
217
    {5, 6, 0, 0x8},
218
    {6, 7, 0, 0x10},
219
    {7, 8, 0, 0x20},
220
    {8, 9, 1, 0x80},
221
    {9, 10, 1, 0x100},
222
    {10, 11, 1, 0x200},
223
    {11, 12, 1, 0x400},
224
    {12, 13, 1, 0x800},
225
    {13, 14, 1, 0x1000},
226
    {14, 15, 1, 0x2000},
227
    {15, 16, 1, 0x4000},
228
    {16, 17, 2, 0x10000},
229
    {17, 18, 2, 0x20000},
230
    {18, 19, 2, 0x40000},
231
    {19, 20, 2, 0x80000},
232
    {20, 21, 2, 0x100000},
233
    {21, 22, 2, 0x200000},
234
    {22, 23, 2, 0x400000},
235
    {23, 24, 2, 0x800000},
236
    {24, 25, 2, 0x1000000},
237
    {25, 26, 2, 0x2000000},
238
    {26, 27, 2, 0x4000000},
239
    {27, 28, 2, 0x8000000},
240
    {28, 29, 2, 0x10000000},
241
    {29, 30, 2, 0x20000000},
242
    {30, 31, 2, 0x40000000},
243
    {31, 32, 2, 0x80000000},
244
    {32, 33, 3, 0x200000000},
245
    {33, 34, 3, 0x400000000},
246
    {34, 35, 3, 0x800000000},
247
    {35, 36, 3, 0x1000000000},
248
    {36, 37, 3, 0x2000000000},
249
    {37, 38, 3, 0x4000000000},
250
    {38, 39, 3, 0x8000000000},
251
    {39, 40, 3, 0x10000000000},
252
    {40, 41, 3, 0x20000000000},
253
    {41, 42, 3, 0x40000000000},
254
    {42, 43, 3, 0x80000000000},
255
    {43, 44, 3, 0x100000000000},
256
    {44, 45, 3, 0x200000000000},
257
    {45, 46, 3, 0x400000000000},
258
    {46, 47, 3, 0x800000000000},
259
    {47, 48, 3, 0x1000000000000},
260
    {48, 49, 3, 0x2000000000000},
261
    {49, 50, 3, 0x4000000000000},
262
    {50, 51, 3, 0x8000000000000},
263
    {51, 52, 3, 0x10000000000000},
264
    {52, 53, 3, 0x20000000000000},
265
    {53, 54, 3, 0x40000000000000},
266
    {54, 55, 3, 0x80000000000000},
267
    {55, 56, 3, 0x100000000000000},
268
    {56, 57, 3, 0x200000000000000},
269
    {57, 58, 3, 0x400000000000000},
270
    {58, 59, 3, 0x800000000000000},
271
    {59, 60, 3, 0x1000000000000000},
272
    {60, 61, 3, 0x2000000000000000},
273
    {61, 62, 3, 0x4000000000000000},
274
    {62, 63, 3, 0x8000000000000000},
275
};
276
277
#else
278
#define MAX_POWER2 30
279
280
static const struct st_features features[] = {
281
    {0, 1, 0, 0x1},
282
    {1, 2, 0, 0x1},
283
    {2, 3, 0, 0x2},
284
    {3, 4, 0, 0x4},
285
    {4, 5, 0, 0x8},
286
    {5, 6, 0, 0x10},
287
    {6, 7, 0, 0x20},
288
    {7, 8, 0, 0x40},
289
    {8, 9, 1, 0x100},
290
    {9, 10, 1, 0x200},
291
    {10, 11, 1, 0x400},
292
    {11, 12, 1, 0x800},
293
    {12, 13, 1, 0x1000},
294
    {13, 14, 1, 0x2000},
295
    {14, 15, 1, 0x4000},
296
    {15, 16, 1, 0x8000},
297
    {16, 17, 2, 0x20000},
298
    {17, 18, 2, 0x40000},
299
    {18, 19, 2, 0x80000},
300
    {19, 20, 2, 0x100000},
301
    {20, 21, 2, 0x200000},
302
    {21, 22, 2, 0x400000},
303
    {22, 23, 2, 0x800000},
304
    {23, 24, 2, 0x1000000},
305
    {24, 25, 2, 0x2000000},
306
    {25, 26, 2, 0x4000000},
307
    {26, 27, 2, 0x8000000},
308
    {27, 28, 2, 0x10000000},
309
    {28, 29, 2, 0x20000000},
310
    {29, 30, 2, 0x40000000},
311
    {30, 31, 2, 0x80000000},
312
};
313
314
#endif
315
316
/* The reserved hash value and its substitution.  */
317
5.87k
#define RESERVED_HASH_VAL (~(st_hash_t) 0)
318
0
#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
319
320
const st_hash_t st_reserved_hash_val = RESERVED_HASH_VAL;
321
const st_hash_t st_reserved_hash_substitution_val = RESERVED_HASH_SUBSTITUTION_VAL;
322
323
/* Return hash value of KEY for table TAB.  */
324
static inline st_hash_t
325
do_hash(st_data_t key, st_table *tab)
326
3.73k
{
327
3.73k
    st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
328
329
    /* RESERVED_HASH_VAL is used for a deleted entry.  Map it into
330
       another value.  Such mapping should be extremely rare.  */
331
3.73k
    return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
332
3.73k
}
333
334
/* Power of 2 defining the minimal number of allocated entries.  */
335
534
#define MINIMAL_POWER2 2
336
337
#if MINIMAL_POWER2 < 2
338
#error "MINIMAL_POWER2 should be >= 2"
339
#endif
340
341
/* If the power2 of the allocated `entries` is less than the following
342
   value, don't allocate bins and use a linear search.  */
343
534
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
344
345
/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE.  */
346
static int
347
get_power2(st_index_t size)
348
534
{
349
534
    unsigned int n;
350
351
2.13k
    for (n = 0; size != 0; n++)
352
1.60k
        size >>= 1;
353
534
    if (n <= MAX_POWER2)
354
534
        return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
355
#ifdef RUBY
356
    /* Ran out of the table entries */
357
    rb_raise(rb_eRuntimeError, "st_table too big");
358
#endif
359
    /* should raise exception */
360
0
    return -1;
361
534
}
362
363
/* Return value of N-th bin in array BINS of table with bins size
364
   index S.  */
365
static inline st_index_t
366
get_bin(st_index_t *bins, int s, st_index_t n)
367
0
{
368
0
    return (s == 0 ? ((unsigned char *) bins)[n]
369
0
      : s == 1 ? ((unsigned short *) bins)[n]
370
0
      : s == 2 ? ((unsigned int *) bins)[n]
371
0
      : ((st_index_t *) bins)[n]);
372
0
}
373
374
/* Set up N-th bin in array BINS of table with bins size index S to
375
   value V.  */
376
static inline void
377
set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
378
0
{
379
0
    if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
380
0
    else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
381
0
    else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
382
0
    else ((st_index_t *) bins)[n] = v;
383
0
}
384
385
/* These macros define reserved values for empty table bin and table
386
   bin which contains a deleted entry.  We will never use such values
387
   for an entry index in bins.  */
388
0
#define EMPTY_BIN    0
389
0
#define DELETED_BIN  1
390
/* Base of a real entry index in the bins.  */
391
0
#define ENTRY_BASE 2
392
393
/* Mark I-th bin of table TAB as empty, in other words not
394
   corresponding to any entry.  */
395
0
#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
396
397
/* Values used for not found entry and bin with given
398
   characteristics.  */
399
9.61k
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
400
4.27k
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
401
402
/* Entry and bin values returned when we found a table rebuild during
403
   the search.  */
404
0
#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
405
0
#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
406
407
/* Mark I-th bin of table TAB as corresponding to a deleted table
408
   entry.  Update number of entries in the table and number of bins
409
   corresponding to deleted entries. */
410
#define MARK_BIN_DELETED(tab, i)        \
411
0
    do {                                                        \
412
0
        st_assert(i != UNDEFINED_BIN_IND);     \
413
0
  st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i));   \
414
0
        set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
415
0
    } while (0)
416
417
/* Macros to check that value B is used empty bins and bins
418
   corresponding deleted entries.  */
419
0
#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
420
0
#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
421
0
#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
422
423
/* Macros to check empty bins and bins corresponding to deleted
424
   entries.  Bins are given by their index I in table TAB.  */
425
#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
426
#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
427
#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
428
429
/* Macros for marking and checking deleted entries given by their
430
   pointer E_PTR.  */
431
2.13k
#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
432
#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
433
434
/* Return bin size index of table TAB.  */
435
static inline unsigned int
436
get_size_ind(const st_table *tab)
437
0
{
438
0
    return tab->size_ind;
439
0
}
440
441
/* Return the number of allocated bins of table TAB.  */
442
static inline st_index_t
443
get_bins_num(const st_table *tab)
444
0
{
445
0
    return ((st_index_t) 1)<<tab->bin_power;
446
0
}
447
448
/* Return mask for a bin index in table TAB.  */
449
static inline st_index_t
450
bins_mask(const st_table *tab)
451
0
{
452
0
    return get_bins_num(tab) - 1;
453
0
}
454
455
/* Return the index of table TAB bin corresponding to
456
   HASH_VALUE.  */
457
static inline st_index_t
458
hash_bin(st_hash_t hash_value, st_table *tab)
459
0
{
460
0
    return hash_value & bins_mask(tab);
461
0
}
462
463
/* Return the number of allocated entries of table TAB.  */
464
static inline st_index_t
465
get_allocated_entries(const st_table *tab)
466
2.67k
{
467
2.67k
    return ((st_index_t) 1)<<tab->entry_power;
468
2.67k
}
469
470
/* Return size of the allocated bins of table TAB.  */
471
static inline st_index_t
472
bins_size(const st_table *tab)
473
0
{
474
0
    return features[tab->entry_power].bins_words * sizeof (st_index_t);
475
0
}
476
477
/* Mark all bins of table TAB as empty.  */
478
static void
479
initialize_bins(st_table *tab)
480
0
{
481
0
    memset(tab->bins, 0, bins_size(tab));
482
0
}
483
484
/* Make table TAB empty.  */
485
static void
486
make_tab_empty(st_table *tab)
487
534
{
488
534
    tab->num_entries = 0;
489
534
    tab->entries_start = tab->entries_bound = 0;
490
534
    if (tab->bins != NULL)
491
0
        initialize_bins(tab);
492
534
}
493
494
#ifdef ST_DEBUG
495
#define st_assert_notinitial(ent) \
496
    do { \
497
  st_assert(ent.hash != (st_hash_t) ST_INIT_VAL);  \
498
  st_assert(ent.key != ST_INIT_VAL); \
499
  st_assert(ent.record != ST_INIT_VAL); \
500
    } while (0)
501
/* Check the table T consistency.  It can be extremely slow.  So use
502
   it only for debugging.  */
503
static void
504
st_check(st_table *tab)
505
{
506
    st_index_t d, e, i, n, p;
507
508
    for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
509
        ;
510
    p = i;
511
    st_assert(p >= MINIMAL_POWER2);
512
    st_assert(tab->entries_bound <= get_allocated_entries(tab));
513
    st_assert(tab->entries_start <= tab->entries_bound);
514
    n = 0;
515
    return;
516
    if (tab->entries_bound != 0)
517
        for (i = tab->entries_start; i < tab->entries_bound; i++) {
518
      st_assert_notinitial(tab->entries[i]);
519
      if (! DELETED_ENTRY_P(&tab->entries[i]))
520
          n++;
521
  }
522
    st_assert(n == tab->num_entries);
523
    if (tab->bins == NULL)
524
        st_assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
525
    else {
526
        st_assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
527
  for (n = d = i = 0; i < get_bins_num(tab); i++) {
528
      st_assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
529
      if (IND_DELETED_BIN_P(tab, i)) {
530
          d++;
531
    continue;
532
      }
533
      else if (IND_EMPTY_BIN_P(tab, i))
534
          continue;
535
      n++;
536
      e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
537
      st_assert(tab->entries_start <= e && e < tab->entries_bound);
538
      st_assert(! DELETED_ENTRY_P(&tab->entries[e]));
539
      st_assert_notinitial(tab->entries[e]);
540
  }
541
  st_assert(n == tab->num_entries);
542
  st_assert(n + d < get_bins_num(tab));
543
    }
544
}
545
#endif
546
547
#ifdef HASH_LOG
548
#ifdef HAVE_UNISTD_H
549
#include <unistd.h>
550
#endif
551
static struct {
552
    int all, total, num, str, strcase;
553
}  collision;
554
555
/* Flag switching off output of package statistics at the end of
556
   program.  */
557
static int init_st = 0;
558
559
/* Output overall number of table searches and collisions into a
560
   temporary file.  */
561
static void
562
stat_col(void)
563
{
564
    char fname[10+sizeof(long)*3];
565
    FILE *f;
566
    if (!collision.total) return;
567
    f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
568
    if (f == NULL)
569
        return;
570
    fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
571
            ((double)collision.all / (collision.total)) * 100);
572
    fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
573
    fclose(f);
574
}
575
#endif
576
577
/* Create and return table with TYPE which can hold at least SIZE
578
   entries.  The real number of entries which the table can hold is
579
   the nearest power of two for SIZE.  */
580
st_table *
581
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
582
534
{
583
534
    st_table *tab;
584
534
    int n;
585
586
#ifdef HASH_LOG
587
#if HASH_LOG+0 < 0
588
    {
589
        const char *e = getenv("ST_HASH_LOG");
590
        if (!e || !*e) init_st = 1;
591
    }
592
#endif
593
    if (init_st == 0) {
594
        init_st = 1;
595
        atexit(stat_col);
596
    }
597
#endif
598
599
534
    n = get_power2(size);
600
534
#ifndef RUBY
601
534
    if (n < 0)
602
0
        return NULL;
603
534
#endif
604
534
    tab = (st_table *) malloc(sizeof (st_table));
605
534
#ifndef RUBY
606
534
    if (tab == NULL)
607
0
        return NULL;
608
534
#endif
609
534
    tab->type = type;
610
534
    tab->entry_power = n;
611
534
    tab->bin_power = features[n].bin_power;
612
534
    tab->size_ind = features[n].size_ind;
613
534
    if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
614
534
        tab->bins = NULL;
615
0
    else {
616
0
        tab->bins = (st_index_t *) malloc(bins_size(tab));
617
0
#ifndef RUBY
618
0
        if (tab->bins == NULL) {
619
0
            free(tab);
620
0
            return NULL;
621
0
        }
622
0
#endif
623
0
    }
624
534
    tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
625
534
               * sizeof(st_table_entry));
626
534
#ifndef RUBY
627
534
    if (tab->entries == NULL) {
628
0
        st_free_table(tab);
629
0
        return NULL;
630
0
    }
631
534
#endif
632
#ifdef ST_DEBUG
633
    memset(tab->entries, ST_INIT_VAL_BYTE,
634
     get_allocated_entries(tab) * sizeof(st_table_entry));
635
    if (tab->bins != NULL)
636
        memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
637
#endif
638
534
    make_tab_empty(tab);
639
534
    tab->rebuilds_num = 0;
640
#ifdef ST_DEBUG
641
    st_check(tab);
642
#endif
643
534
    return tab;
644
534
}
645
646
#ifdef RUBY
647
/* Create and return table with TYPE which can hold a minimal number
648
   of entries (see comments for get_power2).  */
649
st_table *
650
st_init_table(const struct st_hash_type *type)
651
{
652
    return st_init_table_with_size(type, 0);
653
}
654
655
/* Create and return table which can hold a minimal number of
656
   numbers.  */
657
st_table *
658
st_init_numtable(void)
659
{
660
    return st_init_table(&type_numhash);
661
}
662
663
/* Create and return table which can hold SIZE numbers.  */
664
st_table *
665
st_init_numtable_with_size(st_index_t size)
666
{
667
    return st_init_table_with_size(&type_numhash, size);
668
}
669
670
/* Create and return table which can hold a minimal number of
671
   strings.  */
672
st_table *
673
st_init_strtable(void)
674
{
675
    return st_init_table(&type_strhash);
676
}
677
678
/* Create and return table which can hold SIZE strings.  */
679
st_table *
680
st_init_strtable_with_size(st_index_t size)
681
{
682
    return st_init_table_with_size(&type_strhash, size);
683
}
684
685
/* Create and return table which can hold a minimal number of strings
686
   whose character case is ignored.  */
687
st_table *
688
st_init_strcasetable(void)
689
{
690
    return st_init_table(&type_strcasehash);
691
}
692
693
/* Create and return table which can hold SIZE strings whose character
694
   case is ignored.  */
695
st_table *
696
st_init_strcasetable_with_size(st_index_t size)
697
{
698
    return st_init_table_with_size(&type_strcasehash, size);
699
}
700
701
/* Make table TAB empty.  */
702
void
703
st_clear(st_table *tab)
704
{
705
    make_tab_empty(tab);
706
    tab->rebuilds_num++;
707
#ifdef ST_DEBUG
708
    st_check(tab);
709
#endif
710
}
711
#endif /* RUBY */
712
713
/* Free table TAB space.  */
714
void
715
st_free_table(st_table *tab)
716
534
{
717
534
    if (tab->bins != NULL)
718
0
        free(tab->bins);
719
534
    free(tab->entries);
720
534
    free(tab);
721
534
}
722
723
#ifdef RUBY
724
/* Return byte size of memory allocted for table TAB.  */
725
size_t
726
st_memsize(const st_table *tab)
727
{
728
    return(sizeof(st_table)
729
           + (tab->bins == NULL ? 0 : bins_size(tab))
730
           + get_allocated_entries(tab) * sizeof(st_table_entry));
731
}
732
#endif /* RUBY */
733
734
static st_index_t
735
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
736
737
static st_index_t
738
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
739
740
static st_index_t
741
find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
742
743
static st_index_t
744
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
745
             st_data_t key, st_index_t *bin_ind);
746
747
#ifdef HASH_LOG
748
static void
749
count_collision(const struct st_hash_type *type)
750
{
751
    collision.all++;
752
    if (type == &type_numhash) {
753
        collision.num++;
754
    }
755
    else if (type == &type_strhash) {
756
        collision.strcase++;
757
    }
758
    else if (type == &type_strcasehash) {
759
        collision.str++;
760
    }
761
}
762
763
#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
764
#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
765
#define collision_check 0
766
#else
767
#define COLLISION
768
#define FOUND_BIN
769
#endif
770
771
/* If the number of entries in the table is at least REBUILD_THRESHOLD
772
   times less than the entry array length, decrease the table
773
   size.  */
774
0
#define REBUILD_THRESHOLD 4
775
776
#if REBUILD_THRESHOLD < 2
777
#error "REBUILD_THRESHOLD should be >= 2"
778
#endif
779
780
/* Rebuild table TAB.  Rebuilding removes all deleted bins and entries
781
   and can change size of the table entries and bins arrays.
782
   Rebuilding is implemented by creation of a new table or by
783
   compaction of the existing one.  */
784
static void
785
rebuild_table(st_table *tab)
786
0
{
787
0
    st_index_t i, ni, bound;
788
0
    unsigned int size_ind;
789
0
    st_table *new_tab;
790
0
    st_table_entry *entries, *new_entries;
791
0
    st_table_entry *curr_entry_ptr;
792
0
    st_index_t *bins;
793
0
    st_index_t bin_ind;
794
795
0
    st_assert(tab != NULL);
796
0
    bound = tab->entries_bound;
797
0
    entries = tab->entries;
798
0
    if ((2 * tab->num_entries <= get_allocated_entries(tab)
799
0
   && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
800
0
  || tab->num_entries < (1 << MINIMAL_POWER2)) {
801
        /* Compaction: */
802
0
        tab->num_entries = 0;
803
0
  if (tab->bins != NULL)
804
0
      initialize_bins(tab);
805
0
  new_tab = tab;
806
0
  new_entries = entries;
807
0
    }
808
0
    else {
809
0
        new_tab = st_init_table_with_size(tab->type,
810
0
            2 * tab->num_entries - 1);
811
0
  new_entries = new_tab->entries;
812
0
    }
813
0
    ni = 0;
814
0
    bins = new_tab->bins;
815
0
    size_ind = get_size_ind(new_tab);
816
0
    for (i = tab->entries_start; i < bound; i++) {
817
0
        curr_entry_ptr = &entries[i];
818
0
  PREFETCH(entries + i + 1, 0);
819
0
  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
820
0
      continue;
821
0
  if (&new_entries[ni] != curr_entry_ptr)
822
0
      new_entries[ni] = *curr_entry_ptr;
823
0
  if (EXPECT(bins != NULL, 1)) {
824
0
      bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
825
0
            curr_entry_ptr->key);
826
0
      st_assert(bin_ind != UNDEFINED_BIN_IND);
827
0
      st_assert(tab == new_tab || new_tab->rebuilds_num == 0);
828
0
      st_assert(IND_EMPTY_BIN_P(new_tab, bin_ind));
829
0
      set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
830
0
  }
831
0
  new_tab->num_entries++;
832
0
  ni++;
833
0
    }
834
0
    if (new_tab != tab) {
835
0
        tab->entry_power = new_tab->entry_power;
836
0
  tab->bin_power = new_tab->bin_power;
837
0
  tab->size_ind = new_tab->size_ind;
838
0
  st_assert(tab->num_entries == ni);
839
0
  st_assert(new_tab->num_entries == ni);
840
0
  if (tab->bins != NULL)
841
0
      free(tab->bins);
842
0
  tab->bins = new_tab->bins;
843
0
  free(tab->entries);
844
0
  tab->entries = new_tab->entries;
845
0
  free(new_tab);
846
0
    }
847
0
    tab->entries_start = 0;
848
0
    tab->entries_bound = tab->num_entries;
849
0
    tab->rebuilds_num++;
850
#ifdef ST_DEBUG
851
    st_check(tab);
852
#endif
853
0
}
854
855
/* Return the next secondary hash index for table TAB using previous
856
   index IND and PERTERB.  Finally modulo of the function becomes a
857
   full *cycle linear congruential generator*, in other words it
858
   guarantees traversing all table bins in extreme case.
859
860
   According the Hull-Dobell theorem a generator
861
   "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
862
     o m and c are relatively prime
863
     o a-1 is divisible by all prime factors of m
864
     o a-1 is divisible by 4 if m is divisible by 4.
865
866
   For our case a is 5, c is 1, and m is a power of two.  */
867
static inline st_index_t
868
secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
869
0
{
870
0
    *perterb >>= 11;
871
0
    ind = (ind << 2) + ind + *perterb + 1;
872
0
    return hash_bin(ind, tab);
873
0
}
874
875
/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
876
   search.  Return the index of the found entry in array `entries`.
877
   If it is not found, return UNDEFINED_ENTRY_IND.  If the table was
878
   rebuilt during the search, return REBUILT_TABLE_ENTRY_IND.  */
879
static inline st_index_t
880
find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
881
5.87k
{
882
5.87k
    int eq_p, rebuilt_p;
883
5.87k
    st_index_t i, bound;
884
5.87k
    st_table_entry *entries;
885
886
5.87k
    bound = tab->entries_bound;
887
5.87k
    entries = tab->entries;
888
12.2k
    for (i = tab->entries_start; i < bound; i++) {
889
8.54k
  DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
890
8.54k
  if (EXPECT(rebuilt_p, 0))
891
0
      return REBUILT_TABLE_ENTRY_IND;
892
8.54k
  if (eq_p)
893
2.13k
      return i;
894
8.54k
    }
895
3.73k
    return UNDEFINED_ENTRY_IND;
896
5.87k
}
897
898
/* Use the quadratic probing.  The method has a better data locality
899
   but more collisions than the current approach.  In average it
900
   results in a bit slower search.  */
901
/*#define QUADRATIC_PROBE*/
902
903
/* Return index of entry with HASH_VALUE and KEY in table TAB.  If
904
   there is no such entry, return UNDEFINED_ENTRY_IND.  If the table
905
   was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND.  */
906
static st_index_t
907
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
908
0
{
909
0
    int eq_p, rebuilt_p;
910
0
    st_index_t ind;
911
#ifdef QUADRATIC_PROBE
912
    st_index_t d;
913
#else
914
0
    st_index_t peterb;
915
0
#endif
916
0
    st_index_t bin;
917
0
    st_table_entry *entries = tab->entries;
918
919
0
    st_assert(tab != NULL);
920
0
    st_assert(tab->bins != NULL);
921
0
    ind = hash_bin(hash_value, tab);
922
#ifdef QUADRATIC_PROBE
923
    d = 1;
924
#else
925
0
    peterb = hash_value;
926
0
#endif
927
0
    FOUND_BIN;
928
0
    for (;;) {
929
0
        bin = get_bin(tab->bins, get_size_ind(tab), ind);
930
0
        if (! EMPTY_OR_DELETED_BIN_P(bin)) {
931
0
      DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
932
0
      if (EXPECT(rebuilt_p, 0))
933
0
    return REBUILT_TABLE_ENTRY_IND;
934
0
      if (eq_p)
935
0
    break;
936
0
  } else if (EMPTY_BIN_P(bin))
937
0
            return UNDEFINED_ENTRY_IND;
938
#ifdef QUADRATIC_PROBE
939
  ind = hash_bin(ind + d, tab);
940
  d++;
941
#else
942
0
        ind = secondary_hash(ind, tab, &peterb);
943
0
#endif
944
0
        COLLISION;
945
0
    }
946
0
    return bin;
947
0
}
948
949
/* Find and return index of table TAB bin corresponding to an entry
950
   with HASH_VALUE and KEY.  If there is no such bin, return
951
   UNDEFINED_BIN_IND.  If the table was rebuilt during the search,
952
   return REBUILT_TABLE_BIN_IND.  */
953
static st_index_t
954
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
955
0
{
956
0
    int eq_p, rebuilt_p;
957
0
    st_index_t ind;
958
#ifdef QUADRATIC_PROBE
959
    st_index_t d;
960
#else
961
0
    st_index_t peterb;
962
0
#endif
963
0
    st_index_t bin;
964
0
    st_table_entry *entries = tab->entries;
965
966
0
    st_assert(tab != NULL);
967
0
    st_assert(tab->bins != NULL);
968
0
    ind = hash_bin(hash_value, tab);
969
#ifdef QUADRATIC_PROBE
970
    d = 1;
971
#else
972
0
    peterb = hash_value;
973
0
#endif
974
0
    FOUND_BIN;
975
0
    for (;;) {
976
0
        bin = get_bin(tab->bins, get_size_ind(tab), ind);
977
0
        if (! EMPTY_OR_DELETED_BIN_P(bin)) {
978
0
      DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
979
0
      if (EXPECT(rebuilt_p, 0))
980
0
    return REBUILT_TABLE_BIN_IND;
981
0
      if (eq_p)
982
0
    break;
983
0
  } else if (EMPTY_BIN_P(bin))
984
0
            return UNDEFINED_BIN_IND;
985
#ifdef QUADRATIC_PROBE
986
  ind = hash_bin(ind + d, tab);
987
  d++;
988
#else
989
0
        ind = secondary_hash(ind, tab, &peterb);
990
0
#endif
991
0
        COLLISION;
992
0
    }
993
0
    return ind;
994
0
}
995
996
/* Find and return index of table TAB bin corresponding to an entry
997
   with HASH_VALUE and KEY.  The entry should be in the table
998
   already.  */
999
static st_index_t
1000
find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
1001
0
{
1002
0
    st_index_t ind;
1003
#ifdef QUADRATIC_PROBE
1004
    st_index_t d;
1005
#else
1006
0
    st_index_t peterb;
1007
0
#endif
1008
0
    st_index_t bin;
1009
0
    st_table_entry *entries = tab->entries;
1010
1011
0
    st_assert(tab != NULL);
1012
0
    st_assert(tab->bins != NULL);
1013
0
    ind = hash_bin(hash_value, tab);
1014
#ifdef QUADRATIC_PROBE
1015
    d = 1;
1016
#else
1017
0
    peterb = hash_value;
1018
0
#endif
1019
0
    FOUND_BIN;
1020
0
    for (;;) {
1021
0
        bin = get_bin(tab->bins, get_size_ind(tab), ind);
1022
0
        if (EMPTY_OR_DELETED_BIN_P(bin))
1023
0
      return ind;
1024
0
  st_assert (entries[bin - ENTRY_BASE].hash != hash_value);
1025
#ifdef QUADRATIC_PROBE
1026
  ind = hash_bin(ind + d, tab);
1027
  d++;
1028
#else
1029
0
        ind = secondary_hash(ind, tab, &peterb);
1030
0
#endif
1031
0
        COLLISION;
1032
0
    }
1033
0
}
1034
1035
/* Return index of table TAB bin for HASH_VALUE and KEY through
1036
   BIN_IND and the pointed value as the function result.  Reserve the
1037
   bin for inclusion of the corresponding entry into the table if it
1038
   is not there yet.  We always find such bin as bins array length is
1039
   bigger entries array.  Although we can reuse a deleted bin, the
1040
   result bin value is always empty if the table has no entry with
1041
   KEY.  Return the entries array index of the found entry or
1042
   UNDEFINED_ENTRY_IND if it is not found.  If the table was rebuilt
1043
   during the search, return REBUILT_TABLE_ENTRY_IND.  */
1044
static st_index_t
1045
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1046
             st_data_t key, st_index_t *bin_ind)
1047
0
{
1048
0
    int eq_p, rebuilt_p;
1049
0
    st_index_t ind;
1050
0
    st_hash_t curr_hash_value = *hash_value;
1051
#ifdef QUADRATIC_PROBE
1052
    st_index_t d;
1053
#else
1054
0
    st_index_t peterb;
1055
0
#endif
1056
0
    st_index_t entry_index;
1057
0
    st_index_t first_deleted_bin_ind;
1058
0
    st_table_entry *entries;
1059
1060
0
    st_assert(tab != NULL);
1061
0
    st_assert(tab->bins != NULL);
1062
0
    st_assert(tab->entries_bound <= get_allocated_entries(tab));
1063
0
    st_assert(tab->entries_start <= tab->entries_bound);
1064
0
    ind = hash_bin(curr_hash_value, tab);
1065
#ifdef QUADRATIC_PROBE
1066
    d = 1;
1067
#else
1068
0
    peterb = curr_hash_value;
1069
0
#endif
1070
0
    FOUND_BIN;
1071
0
    first_deleted_bin_ind = UNDEFINED_BIN_IND;
1072
0
    entries = tab->entries;
1073
0
    for (;;) {
1074
0
        entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1075
0
        if (EMPTY_BIN_P(entry_index)) {
1076
0
            tab->num_entries++;
1077
0
      entry_index = UNDEFINED_ENTRY_IND;
1078
0
            if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1079
                /* We can reuse bin of a deleted entry.  */
1080
0
                ind = first_deleted_bin_ind;
1081
0
                MARK_BIN_EMPTY(tab, ind);
1082
0
            }
1083
0
            break;
1084
0
  }
1085
0
  else if (! DELETED_BIN_P(entry_index)) {
1086
0
      DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1087
0
      if (EXPECT(rebuilt_p, 0))
1088
0
    return REBUILT_TABLE_ENTRY_IND;
1089
0
            if (eq_p)
1090
0
                break;
1091
0
  }
1092
0
  else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1093
0
            first_deleted_bin_ind = ind;
1094
#ifdef QUADRATIC_PROBE
1095
  ind = hash_bin(ind + d, tab);
1096
  d++;
1097
#else
1098
0
        ind = secondary_hash(ind, tab, &peterb);
1099
0
#endif
1100
0
        COLLISION;
1101
0
    }
1102
0
    *bin_ind = ind;
1103
0
    return entry_index;
1104
0
}
1105
1106
/* Find an entry with KEY in table TAB.  Return non-zero if we found
1107
   it.  Set up *RECORD to the found entry record.  */
1108
int
1109
st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1110
1.60k
{
1111
1.60k
    st_index_t bin;
1112
1.60k
    st_hash_t hash = do_hash(key, tab);
1113
1114
1.60k
 retry:
1115
1.60k
    if (tab->bins == NULL) {
1116
1.60k
        bin = find_entry(tab, hash, key);
1117
1.60k
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1118
0
      goto retry;
1119
1.60k
  if (bin == UNDEFINED_ENTRY_IND)
1120
1.60k
      return 0;
1121
1.60k
    }
1122
0
    else {
1123
0
        bin = find_table_entry_ind(tab, hash, key);
1124
0
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1125
0
      goto retry;
1126
0
  if (bin == UNDEFINED_ENTRY_IND)
1127
0
      return 0;
1128
0
  bin -= ENTRY_BASE;
1129
0
    }
1130
0
    if (value != 0)
1131
0
        *value = tab->entries[bin].record;
1132
0
    return 1;
1133
1.60k
}
1134
1135
#ifdef RUBY
1136
/* Find an entry with KEY in table TAB.  Return non-zero if we found
1137
   it.  Set up *RESULT to the found table entry key.  */
1138
int
1139
st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1140
{
1141
    st_index_t bin;
1142
    st_hash_t hash = do_hash(key, tab);
1143
1144
 retry:
1145
    if (tab->bins == NULL) {
1146
        bin = find_entry(tab, hash, key);
1147
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1148
      goto retry;
1149
  if (bin == UNDEFINED_ENTRY_IND)
1150
      return 0;
1151
    }
1152
    else {
1153
        bin = find_table_entry_ind(tab, hash, key);
1154
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1155
      goto retry;
1156
  if (bin == UNDEFINED_ENTRY_IND)
1157
      return 0;
1158
  bin -= ENTRY_BASE;
1159
    }
1160
    if (result != 0)
1161
        *result = tab->entries[bin].key;
1162
    return 1;
1163
}
1164
#endif /* RUBY */
1165
1166
/* Check the table and rebuild it if it is necessary.  */
1167
static inline void
1168
rebuild_table_if_necessary(st_table *tab)
1169
2.13k
{
1170
2.13k
    st_index_t bound = tab->entries_bound;
1171
1172
2.13k
    if (bound == get_allocated_entries(tab))
1173
0
        rebuild_table(tab);
1174
2.13k
    st_assert(tab->entries_bound < get_allocated_entries(tab));
1175
2.13k
}
1176
1177
/* Insert (KEY, VALUE) into table TAB and return zero.  If there is
1178
   already entry with KEY in the table, return nonzero and and update
1179
   the value of the found entry.  */
1180
int
1181
st_insert(st_table *tab, st_data_t key, st_data_t value)
1182
2.13k
{
1183
2.13k
    st_table_entry *entry;
1184
2.13k
    st_index_t bin;
1185
2.13k
    st_index_t ind;
1186
2.13k
    st_hash_t hash_value;
1187
2.13k
    st_index_t bin_ind;
1188
2.13k
    int new_p;
1189
1190
2.13k
    hash_value = do_hash(key, tab);
1191
2.13k
 retry:
1192
2.13k
    rebuild_table_if_necessary(tab);
1193
2.13k
    if (tab->bins == NULL) {
1194
2.13k
        bin = find_entry(tab, hash_value, key);
1195
2.13k
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1196
0
      goto retry;
1197
2.13k
  new_p = bin == UNDEFINED_ENTRY_IND;
1198
2.13k
  if (new_p)
1199
2.13k
      tab->num_entries++;
1200
2.13k
  bin_ind = UNDEFINED_BIN_IND;
1201
2.13k
    }
1202
0
    else {
1203
0
        bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1204
0
               key, &bin_ind);
1205
0
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1206
0
      goto retry;
1207
0
  new_p = bin == UNDEFINED_ENTRY_IND;
1208
0
  bin -= ENTRY_BASE;
1209
0
    }
1210
2.13k
    if (new_p) {
1211
2.13k
        st_assert(tab->entries_bound < get_allocated_entries(tab));
1212
2.13k
  ind = tab->entries_bound++;
1213
2.13k
        entry = &tab->entries[ind];
1214
2.13k
        entry->hash = hash_value;
1215
2.13k
        entry->key = key;
1216
2.13k
        entry->record = value;
1217
2.13k
  if (bin_ind != UNDEFINED_BIN_IND)
1218
0
      set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1219
#ifdef ST_DEBUG
1220
  st_check(tab);
1221
#endif
1222
2.13k
        return 0;
1223
2.13k
    }
1224
0
    tab->entries[bin].record = value;
1225
#ifdef ST_DEBUG
1226
    st_check(tab);
1227
#endif
1228
0
    return 1;
1229
2.13k
}
1230
1231
#ifdef RUBY
1232
/* Insert (KEY, VALUE, HASH) into table TAB.  The table should not have
1233
   entry with KEY before the insertion.  */
1234
void
1235
st_add_direct_with_hash(st_table *tab,
1236
      st_data_t key, st_data_t value, st_hash_t hash)
1237
{
1238
    st_table_entry *entry;
1239
    st_index_t ind;
1240
    st_index_t bin_ind;
1241
1242
    rebuild_table_if_necessary(tab);
1243
    ind = tab->entries_bound++;
1244
    entry = &tab->entries[ind];
1245
    entry->hash = hash;
1246
    entry->key = key;
1247
    entry->record = value;
1248
    tab->num_entries++;
1249
    if (tab->bins != NULL) {
1250
        bin_ind = find_table_bin_ind_direct(tab, hash, key);
1251
  st_assert (bin_ind != UNDEFINED_BIN_IND);
1252
  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1253
    }
1254
#ifdef ST_DEBUG
1255
    st_check(tab);
1256
#endif
1257
}
1258
1259
/* Insert (KEY, VALUE) into table TAB.  The table should not have
1260
   entry with KEY before the insertion.  */
1261
void
1262
st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1263
{
1264
    st_hash_t hash_value;
1265
1266
    hash_value = do_hash(key, tab);
1267
    st_add_direct_with_hash(tab, key, value, hash_value);
1268
}
1269
1270
/* Insert (FUNC(KEY), VALUE) into table TAB and return zero.  If
1271
   there is already entry with KEY in the table, return nonzero and
1272
   and update the value of the found entry.  */
1273
int
1274
st_insert2(st_table *tab, st_data_t key, st_data_t value,
1275
           st_data_t (*func)(st_data_t))
1276
{
1277
    st_table_entry *entry;
1278
    st_index_t bin;
1279
    st_index_t ind, check;
1280
    st_hash_t hash_value;
1281
    st_index_t bin_ind;
1282
    int new_p;
1283
1284
    hash_value = do_hash(key, tab);
1285
 retry:
1286
    rebuild_table_if_necessary (tab);
1287
    if (tab->bins == NULL) {
1288
        bin = find_entry(tab, hash_value, key);
1289
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1290
      goto retry;
1291
  new_p = bin == UNDEFINED_ENTRY_IND;
1292
  if (new_p)
1293
      tab->num_entries++;
1294
  bin_ind = UNDEFINED_BIN_IND;
1295
    }
1296
    else {
1297
        bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1298
               key, &bin_ind);
1299
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1300
      goto retry;
1301
  new_p = bin == UNDEFINED_ENTRY_IND;
1302
  bin -= ENTRY_BASE;
1303
    }
1304
    if (new_p) {
1305
        st_assert(tab->entries_bound < get_allocated_entries(tab));
1306
        check = tab->rebuilds_num;
1307
        key = (*func)(key);
1308
        st_assert(check == tab->rebuilds_num);
1309
        ind = tab->entries_bound++;
1310
        entry = &tab->entries[ind];
1311
        entry->hash = hash_value;
1312
        entry->key = key;
1313
        entry->record = value;
1314
  if (bin_ind != UNDEFINED_BIN_IND)
1315
      set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1316
  st_assert(do_hash(key, tab) == hash_value);
1317
#ifdef ST_DEBUG
1318
  st_check(tab);
1319
#endif
1320
        return 0;
1321
    }
1322
    tab->entries[bin].record = value;
1323
#ifdef ST_DEBUG
1324
    st_check(tab);
1325
#endif
1326
    return 1;
1327
}
1328
1329
/* Create and return a copy of table OLD_TAB.  */
1330
st_table *
1331
st_copy(st_table *old_tab)
1332
{
1333
    st_table *new_tab;
1334
1335
    new_tab = (st_table *) malloc(sizeof(st_table));
1336
#ifndef RUBY
1337
    if (new_tab == NULL)
1338
        return NULL;
1339
#endif
1340
    *new_tab = *old_tab;
1341
    if (old_tab->bins == NULL)
1342
        new_tab->bins = NULL;
1343
    else {
1344
        new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1345
#ifndef RUBY
1346
        if (new_tab->bins == NULL) {
1347
            free(new_tab);
1348
            return NULL;
1349
        }
1350
#endif
1351
    }
1352
    new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1353
             * sizeof(st_table_entry));
1354
#ifndef RUBY
1355
    if (new_tab->entries == NULL) {
1356
        st_free_table(new_tab);
1357
        return NULL;
1358
    }
1359
#endif
1360
    MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1361
     get_allocated_entries(old_tab));
1362
    if (old_tab->bins != NULL)
1363
        MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1364
#ifdef ST_DEBUG
1365
    st_check(new_tab);
1366
#endif
1367
    return new_tab;
1368
}
1369
#endif /* RUBY */
1370
1371
/* Update the entries start of table TAB after removing an entry
1372
   with index N in the array entries.  */
1373
static inline void
1374
update_range_for_deleted(st_table *tab, st_index_t n)
1375
2.13k
{
1376
    /* Do not update entries_bound here.  Otherwise, we can fill all
1377
       bins by deleted entry value before rebuilding the table.  */
1378
2.13k
    if (tab->entries_start == n)
1379
2.13k
        tab->entries_start = n + 1;
1380
2.13k
}
1381
1382
#ifdef RUBY
1383
/* Delete entry with KEY from table TAB, set up *VALUE (unless
1384
   VALUE is zero) from deleted table entry, and return non-zero.  If
1385
   there is no entry with KEY in the table, clear *VALUE (unless VALUE
1386
   is zero), and return zero.  */
1387
static int
1388
st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1389
{
1390
    st_table_entry *entry;
1391
    st_index_t bin;
1392
    st_index_t bin_ind;
1393
    st_hash_t hash;
1394
1395
    st_assert(tab != NULL);
1396
    hash = do_hash(*key, tab);
1397
 retry:
1398
    if (tab->bins == NULL) {
1399
        bin = find_entry(tab, hash, *key);
1400
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1401
      goto retry;
1402
  if (bin == UNDEFINED_ENTRY_IND) {
1403
      if (value != 0) *value = 0;
1404
      return 0;
1405
  }
1406
    }
1407
    else {
1408
        bin_ind = find_table_bin_ind(tab, hash, *key);
1409
  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1410
      goto retry;
1411
  if (bin_ind == UNDEFINED_BIN_IND) {
1412
      if (value != 0) *value = 0;
1413
      return 0;
1414
  }
1415
  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1416
  MARK_BIN_DELETED(tab, bin_ind);
1417
    }
1418
    entry = &tab->entries[bin];
1419
    *key = entry->key;
1420
    if (value != 0) *value = entry->record;
1421
    MARK_ENTRY_DELETED(entry);
1422
    tab->num_entries--;
1423
    update_range_for_deleted(tab, bin);
1424
#ifdef ST_DEBUG
1425
    st_check(tab);
1426
#endif
1427
    return 1;
1428
}
1429
1430
int
1431
st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1432
{
1433
    return st_general_delete(tab, key, value);
1434
}
1435
1436
/* The function and other functions with suffix '_safe' or '_check'
1437
   are originated from the previous implementation of the hash tables.
1438
   It was necessary for correct deleting entries during traversing
1439
   tables.  The current implementation permits deletion during
1440
   traversing without a specific way to do this.  */
1441
int
1442
st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1443
               st_data_t never ATTRIBUTE_UNUSED)
1444
{
1445
    return st_general_delete(tab, key, value);
1446
}
1447
1448
/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1449
   return zero.  Otherwise, remove the first entry in the table.
1450
   Return its key through KEY and its record through VALUE (unless
1451
   VALUE is zero).  */
1452
int
1453
st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1454
{
1455
    st_index_t i, bound;
1456
    st_index_t bin;
1457
    st_table_entry *entries, *curr_entry_ptr;
1458
    st_index_t bin_ind;
1459
1460
    entries = tab->entries;
1461
    bound = tab->entries_bound;
1462
    for (i = tab->entries_start; i < bound; i++) {
1463
        curr_entry_ptr = &entries[i];
1464
  if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1465
      st_hash_t entry_hash = curr_entry_ptr->hash;
1466
      st_data_t entry_key = curr_entry_ptr->key;
1467
1468
      if (value != 0) *value = curr_entry_ptr->record;
1469
      *key = entry_key;
1470
  retry:
1471
      if (tab->bins == NULL) {
1472
          bin = find_entry(tab, entry_hash, entry_key);
1473
    if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1474
        entries = tab->entries;
1475
        goto retry;
1476
    }
1477
    st_assert(bin != UNDEFINED_ENTRY_IND);
1478
    curr_entry_ptr = &entries[bin];
1479
      }
1480
      else {
1481
          bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1482
    if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1483
        entries = tab->entries;
1484
        goto retry;
1485
    }
1486
    st_assert(bin_ind != UNDEFINED_BIN_IND);
1487
    curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1488
            - ENTRY_BASE];
1489
    MARK_BIN_DELETED(tab, bin_ind);
1490
      }
1491
      st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key);
1492
      MARK_ENTRY_DELETED(curr_entry_ptr);
1493
      tab->num_entries--;
1494
      update_range_for_deleted(tab, i);
1495
#ifdef ST_DEBUG
1496
      st_check(tab);
1497
#endif
1498
      return 1;
1499
  }
1500
    }
1501
    st_assert(tab->num_entries == 0);
1502
    tab->entries_start = tab->entries_bound = 0;
1503
    if (value != 0) *value = 0;
1504
    return 0;
1505
}
1506
1507
/* See comments for function st_delete_safe.  */
1508
void
1509
st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1510
                st_data_t never ATTRIBUTE_UNUSED)
1511
{
1512
}
1513
1514
/* Find entry with KEY in table TAB, call FUNC with the key and the
1515
   value of the found entry, and non-zero as the 3rd argument.  If the
1516
   entry is not found, call FUNC with KEY, and 2 zero arguments.  If
1517
   the call returns ST_CONTINUE, the table will have an entry with key
1518
   and value returned by FUNC through the 1st and 2nd parameters.  If
1519
   the call of FUNC returns ST_DELETE, the table will not have entry
1520
   with KEY.  The function returns flag of that the entry with KEY was
1521
   in the table before the call.  */
1522
int
1523
st_update(st_table *tab, st_data_t key,
1524
    st_update_callback_func *func, st_data_t arg)
1525
{
1526
    st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1527
    st_index_t bin = 0; /* Ditto */
1528
    st_table_entry *entries;
1529
    st_index_t bin_ind;
1530
    st_data_t value = 0, old_key;
1531
    st_index_t check;
1532
    int retval, existing;
1533
    st_hash_t hash = do_hash(key, tab);
1534
1535
 retry:
1536
    entries = tab->entries;
1537
    if (tab->bins == NULL) {
1538
        bin = find_entry(tab, hash, key);
1539
  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1540
      goto retry;
1541
  existing = bin != UNDEFINED_ENTRY_IND;
1542
  entry = &entries[bin];
1543
  bin_ind = UNDEFINED_BIN_IND;
1544
    }
1545
    else {
1546
        bin_ind = find_table_bin_ind(tab, hash, key);
1547
  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1548
      goto retry;
1549
  existing = bin_ind != UNDEFINED_BIN_IND;
1550
  if (existing) {
1551
      bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1552
      entry = &entries[bin];
1553
  }
1554
    }
1555
    if (existing) {
1556
        key = entry->key;
1557
        value = entry->record;
1558
    }
1559
    old_key = key;
1560
    check = tab->rebuilds_num;
1561
    retval = (*func)(&key, &value, arg, existing);
1562
    st_assert(check == tab->rebuilds_num);
1563
    switch (retval) {
1564
      case ST_CONTINUE:
1565
        if (! existing) {
1566
      st_add_direct_with_hash(tab, key, value, hash);
1567
            break;
1568
        }
1569
        if (old_key != key) {
1570
            entry->key = key;
1571
        }
1572
        entry->record = value;
1573
        break;
1574
      case ST_DELETE:
1575
        if (existing) {
1576
      if (bin_ind != UNDEFINED_BIN_IND)
1577
          MARK_BIN_DELETED(tab, bin_ind);
1578
            MARK_ENTRY_DELETED(entry);
1579
      tab->num_entries--;
1580
      update_range_for_deleted(tab, bin);
1581
#ifdef ST_DEBUG
1582
      st_check(tab);
1583
#endif
1584
        }
1585
        break;
1586
    }
1587
#ifdef ST_DEBUG
1588
    st_check(tab);
1589
#endif
1590
    return existing;
1591
}
1592
#endif /* RUBY */
1593
1594
/* Traverse all entries in table TAB calling FUNC with current entry
1595
   key and value and zero.  If the call returns ST_STOP, stop
1596
   traversing.  If the call returns ST_DELETE, delete the current
1597
   entry from the table.  In case of ST_CHECK or ST_CONTINUE, continue
1598
   traversing.  The function returns zero unless an error is found.
1599
   CHECK_P is flag of st_foreach_check call.  The behavior is a bit
1600
   different for ST_CHECK and when the current element is removed
1601
   during traversing.  */
1602
static inline int
1603
st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg,
1604
       int check_p)
1605
534
{
1606
534
    st_index_t bin;
1607
534
    st_index_t bin_ind;
1608
534
    st_table_entry *entries, *curr_entry_ptr;
1609
534
    enum st_retval retval;
1610
534
    st_index_t i, rebuilds_num;
1611
534
    st_hash_t hash;
1612
534
    st_data_t key;
1613
534
    int error_p, packed_p = tab->bins == NULL;
1614
1615
534
    st_assert(tab->entries_start <= tab->entries_bound);
1616
534
    entries = tab->entries;
1617
    /* The bound can change inside the loop even without rebuilding
1618
       the table, e.g. by an entry inesrtion.  */
1619
2.67k
    for (i = tab->entries_start; i < tab->entries_bound; i++) {
1620
2.13k
        curr_entry_ptr = &entries[i];
1621
2.13k
  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1622
0
      continue;
1623
2.13k
  key = curr_entry_ptr->key;
1624
2.13k
  rebuilds_num = tab->rebuilds_num;
1625
2.13k
  hash = curr_entry_ptr->hash;
1626
2.13k
  retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1627
1628
2.13k
        if (retval == ST_REPLACE && replace) {
1629
0
            st_data_t value;
1630
0
            value = curr_entry_ptr->record;
1631
0
            retval = (*replace)(&key, &value, arg, TRUE);
1632
0
            curr_entry_ptr->key = key;
1633
0
            curr_entry_ptr->record = value;
1634
0
        }
1635
1636
2.13k
  if (rebuilds_num != tab->rebuilds_num) {
1637
0
  retry:
1638
0
      entries = tab->entries;
1639
0
      packed_p = tab->bins == NULL;
1640
0
      if (packed_p) {
1641
0
          i = find_entry(tab, hash, key);
1642
0
    if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1643
0
        goto retry;
1644
0
    error_p = i == UNDEFINED_ENTRY_IND;
1645
0
      }
1646
0
      else {
1647
0
          i = find_table_entry_ind(tab, hash, key);
1648
0
    if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1649
0
        goto retry;
1650
0
    error_p = i == UNDEFINED_ENTRY_IND;
1651
0
    i -= ENTRY_BASE;
1652
0
      }
1653
0
      if (error_p && check_p) {
1654
          /* call func with error notice */
1655
0
          retval = (*func)(0, 0, arg, 1);
1656
#ifdef ST_DEBUG
1657
    st_check(tab);
1658
#endif
1659
0
    return 1;
1660
0
      }
1661
0
      curr_entry_ptr = &entries[i];
1662
0
  }
1663
2.13k
  switch (retval) {
1664
0
          case ST_REPLACE:
1665
0
            break;
1666
0
    case ST_CONTINUE:
1667
0
      break;
1668
0
    case ST_CHECK:
1669
0
      if (check_p)
1670
0
    break;
1671
0
    case ST_STOP:
1672
#ifdef ST_DEBUG
1673
      st_check(tab);
1674
#endif
1675
0
      return 0;
1676
2.13k
    case ST_DELETE: {
1677
2.13k
      st_data_t key = curr_entry_ptr->key;
1678
1679
2.13k
      again:
1680
2.13k
      if (packed_p) {
1681
2.13k
    bin = find_entry(tab, hash, key);
1682
2.13k
    if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1683
0
        goto again;
1684
2.13k
    if (bin == UNDEFINED_ENTRY_IND)
1685
0
        break;
1686
2.13k
      }
1687
0
      else {
1688
0
    bin_ind = find_table_bin_ind(tab, hash, key);
1689
0
    if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1690
0
        goto again;
1691
0
    if (bin_ind == UNDEFINED_BIN_IND)
1692
0
        break;
1693
0
    bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1694
0
    MARK_BIN_DELETED(tab, bin_ind);
1695
0
      }
1696
2.13k
      curr_entry_ptr = &entries[bin];
1697
2.13k
      MARK_ENTRY_DELETED(curr_entry_ptr);
1698
2.13k
      tab->num_entries--;
1699
2.13k
      update_range_for_deleted(tab, bin);
1700
#ifdef ST_DEBUG
1701
      st_check(tab);
1702
#endif
1703
2.13k
      break;
1704
2.13k
    }
1705
2.13k
  }
1706
2.13k
    }
1707
#ifdef ST_DEBUG
1708
    st_check(tab);
1709
#endif
1710
534
    return 0;
1711
534
}
1712
1713
#ifdef RUBY
1714
int
1715
st_foreach_with_replace(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg)
1716
{
1717
    return st_general_foreach(tab, func, replace, arg, TRUE);
1718
}
1719
#endif /* RUBY */
1720
1721
int
1722
st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg)
1723
534
{
1724
534
    return st_general_foreach(tab, func, NULL, arg, FALSE);
1725
534
}
1726
1727
#ifdef RUBY
1728
/* See comments for function st_delete_safe.  */
1729
int
1730
st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
1731
                 st_data_t never ATTRIBUTE_UNUSED)
1732
{
1733
    return st_general_foreach(tab, func, NULL, arg, TRUE);
1734
}
1735
1736
/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1737
   Return the number of keys set up in array KEYS.  */
1738
static inline st_index_t
1739
st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1740
{
1741
    st_index_t i, bound;
1742
    st_data_t key, *keys_start, *keys_end;
1743
    st_table_entry *curr_entry_ptr, *entries = tab->entries;
1744
1745
    bound = tab->entries_bound;
1746
    keys_start = keys;
1747
    keys_end = keys + size;
1748
    for (i = tab->entries_start; i < bound; i++) {
1749
  if (keys == keys_end)
1750
      break;
1751
  curr_entry_ptr = &entries[i];
1752
  key = curr_entry_ptr->key;
1753
        if (! DELETED_ENTRY_P(curr_entry_ptr))
1754
      *keys++ = key;
1755
    }
1756
1757
    return keys - keys_start;
1758
}
1759
1760
st_index_t
1761
st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1762
{
1763
    return st_general_keys(tab, keys, size);
1764
}
1765
1766
/* See comments for function st_delete_safe.  */
1767
st_index_t
1768
st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1769
              st_data_t never ATTRIBUTE_UNUSED)
1770
{
1771
    return st_general_keys(tab, keys, size);
1772
}
1773
1774
/* Set up array VALUES by at most SIZE values of head table TAB
1775
   entries.  Return the number of values set up in array VALUES.  */
1776
static inline st_index_t
1777
st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1778
{
1779
    st_index_t i, bound;
1780
    st_data_t *values_start, *values_end;
1781
    st_table_entry *curr_entry_ptr, *entries = tab->entries;
1782
1783
    values_start = values;
1784
    values_end = values + size;
1785
    bound = tab->entries_bound;
1786
    st_assert(bound != 0);
1787
    for (i = tab->entries_start; i < bound; i++) {
1788
  if (values == values_end)
1789
      break;
1790
        curr_entry_ptr = &entries[i];
1791
        if (! DELETED_ENTRY_P(curr_entry_ptr))
1792
      *values++ = curr_entry_ptr->record;
1793
    }
1794
1795
    return values - values_start;
1796
}
1797
1798
st_index_t
1799
st_values(st_table *tab, st_data_t *values, st_index_t size)
1800
{
1801
    return st_general_values(tab, values, size);
1802
}
1803
1804
/* See comments for function st_delete_safe.  */
1805
st_index_t
1806
st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1807
    st_data_t never ATTRIBUTE_UNUSED)
1808
{
1809
    return st_general_values(tab, values, size);
1810
}
1811
1812
#define FNV1_32A_INIT 0x811c9dc5
1813
1814
/*
1815
 * 32 bit magic FNV-1a prime
1816
 */
1817
#define FNV_32_PRIME 0x01000193
1818
1819
#ifndef UNALIGNED_WORD_ACCESS
1820
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1821
     defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1822
     defined(__powerpc64__) || \
1823
     defined(__mc68020__)
1824
#   define UNALIGNED_WORD_ACCESS 1
1825
# endif
1826
#endif
1827
#ifndef UNALIGNED_WORD_ACCESS
1828
# define UNALIGNED_WORD_ACCESS 0
1829
#endif
1830
1831
/* This hash function is quite simplified MurmurHash3
1832
 * Simplification is legal, cause most of magic still happens in finalizator.
1833
 * And finalizator is almost the same as in MurmurHash3 */
1834
#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1835
#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1836
1837
#if ST_INDEX_BITS <= 32
1838
#define C1 (st_index_t)0xcc9e2d51
1839
#define C2 (st_index_t)0x1b873593
1840
#else
1841
#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1842
#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1843
#endif
1844
NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1845
NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1846
NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1847
1848
static inline st_index_t
1849
murmur_step(st_index_t h, st_index_t k)
1850
{
1851
#if ST_INDEX_BITS <= 32
1852
#define r1 (17)
1853
#define r2 (11)
1854
#else
1855
#define r1 (33)
1856
#define r2 (24)
1857
#endif
1858
    k *= C1;
1859
    h ^= ROTL(k, r1);
1860
    h *= C2;
1861
    h = ROTL(h, r2);
1862
    return h;
1863
}
1864
#undef r1
1865
#undef r2
1866
1867
static inline st_index_t
1868
murmur_finish(st_index_t h)
1869
{
1870
#if ST_INDEX_BITS <= 32
1871
#define r1 (16)
1872
#define r2 (13)
1873
#define r3 (16)
1874
    const st_index_t c1 = 0x85ebca6b;
1875
    const st_index_t c2 = 0xc2b2ae35;
1876
#else
1877
/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1878
#define r1 (30)
1879
#define r2 (27)
1880
#define r3 (31)
1881
    const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1882
    const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1883
#endif
1884
#if ST_INDEX_BITS > 64
1885
    h ^= h >> 64;
1886
    h *= c2;
1887
    h ^= h >> 65;
1888
#endif
1889
    h ^= h >> r1;
1890
    h *= c1;
1891
    h ^= h >> r2;
1892
    h *= c2;
1893
    h ^= h >> r3;
1894
    return h;
1895
}
1896
#undef r1
1897
#undef r2
1898
#undef r3
1899
1900
st_index_t
1901
st_hash(const void *ptr, size_t len, st_index_t h)
1902
{
1903
    const char *data = ptr;
1904
    st_index_t t = 0;
1905
    size_t l = len;
1906
1907
#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1908
#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1909
#if SIZEOF_ST_INDEX_T > 4
1910
#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1911
#if SIZEOF_ST_INDEX_T > 8
1912
#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1913
    UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1914
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1915
#endif
1916
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1917
#else
1918
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1919
#endif
1920
#undef SKIP_TAIL
1921
    if (len >= sizeof(st_index_t)) {
1922
#if !UNALIGNED_WORD_ACCESS
1923
  int align = (int)((st_data_t)data % sizeof(st_index_t));
1924
  if (align) {
1925
      st_index_t d = 0;
1926
      int sl, sr, pack;
1927
1928
      switch (align) {
1929
#ifdef WORDS_BIGENDIAN
1930
# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1931
    t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1932
#else
1933
# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1934
    t |= data_at(n) << CHAR_BIT*(n)
1935
#endif
1936
    UNALIGNED_ADD_ALL;
1937
#undef UNALIGNED_ADD
1938
      }
1939
1940
#ifdef WORDS_BIGENDIAN
1941
      t >>= (CHAR_BIT * align) - CHAR_BIT;
1942
#else
1943
      t <<= (CHAR_BIT * align);
1944
#endif
1945
1946
      data += sizeof(st_index_t)-align;
1947
      len -= sizeof(st_index_t)-align;
1948
1949
      sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1950
      sr = CHAR_BIT * align;
1951
1952
      while (len >= sizeof(st_index_t)) {
1953
    d = *(st_index_t *)data;
1954
#ifdef WORDS_BIGENDIAN
1955
    t = (t << sr) | (d >> sl);
1956
#else
1957
    t = (t >> sr) | (d << sl);
1958
#endif
1959
    h = murmur_step(h, t);
1960
    t = d;
1961
    data += sizeof(st_index_t);
1962
    len -= sizeof(st_index_t);
1963
      }
1964
1965
      pack = len < (size_t)align ? (int)len : align;
1966
      d = 0;
1967
      switch (pack) {
1968
#ifdef WORDS_BIGENDIAN
1969
# define UNALIGNED_ADD(n) case (n) + 1: \
1970
    d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1971
#else
1972
# define UNALIGNED_ADD(n) case (n) + 1: \
1973
    d |= data_at(n) << CHAR_BIT*(n)
1974
#endif
1975
    UNALIGNED_ADD_ALL;
1976
#undef UNALIGNED_ADD
1977
      }
1978
#ifdef WORDS_BIGENDIAN
1979
      t = (t << sr) | (d >> sl);
1980
#else
1981
      t = (t >> sr) | (d << sl);
1982
#endif
1983
1984
      if (len < (size_t)align) goto skip_tail;
1985
# define SKIP_TAIL 1
1986
      h = murmur_step(h, t);
1987
      data += pack;
1988
      len -= pack;
1989
  }
1990
  else
1991
#endif
1992
#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1993
#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1994
#else
1995
#define aligned_data data
1996
#endif
1997
  {
1998
      do {
1999
    h = murmur_step(h, *(st_index_t *)aligned_data);
2000
    data += sizeof(st_index_t);
2001
    len -= sizeof(st_index_t);
2002
      } while (len >= sizeof(st_index_t));
2003
  }
2004
    }
2005
2006
    t = 0;
2007
    switch (len) {
2008
#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
2009
    /* in this case byteorder doesn't really matter */
2010
#if SIZEOF_ST_INDEX_T > 4
2011
      case 7: t |= data_at(6) << 48;
2012
      case 6: t |= data_at(5) << 40;
2013
      case 5: t |= data_at(4) << 32;
2014
      case 4:
2015
  t |= (st_index_t)*(uint32_t*)aligned_data;
2016
  goto skip_tail;
2017
# define SKIP_TAIL 1
2018
#endif
2019
      case 3: t |= data_at(2) << 16;
2020
      case 2: t |= data_at(1) << 8;
2021
      case 1: t |= data_at(0);
2022
#else
2023
#ifdef WORDS_BIGENDIAN
2024
# define UNALIGNED_ADD(n) case (n) + 1: \
2025
  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2026
#else
2027
# define UNALIGNED_ADD(n) case (n) + 1: \
2028
  t |= data_at(n) << CHAR_BIT*(n)
2029
#endif
2030
  UNALIGNED_ADD_ALL;
2031
#undef UNALIGNED_ADD
2032
#endif
2033
#ifdef SKIP_TAIL
2034
      skip_tail:
2035
#endif
2036
  h ^= t; h -= ROTL(t, 7);
2037
  h *= C2;
2038
    }
2039
    h ^= l;
2040
#undef aligned_data
2041
2042
    return murmur_finish(h);
2043
}
2044
2045
st_index_t
2046
st_hash_uint32(st_index_t h, uint32_t i)
2047
{
2048
    return murmur_step(h, i);
2049
}
2050
2051
NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2052
st_index_t
2053
st_hash_uint(st_index_t h, st_index_t i)
2054
{
2055
    i += h;
2056
/* no matter if it is BigEndian or LittleEndian,
2057
 * we hash just integers */
2058
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2059
    h = murmur_step(h, i >> 8*8);
2060
#endif
2061
    h = murmur_step(h, i);
2062
    return h;
2063
}
2064
2065
st_index_t
2066
st_hash_end(st_index_t h)
2067
{
2068
    h = murmur_finish(h);
2069
    return h;
2070
}
2071
2072
#undef st_hash_start
2073
st_index_t
2074
st_hash_start(st_index_t h)
2075
{
2076
    return h;
2077
}
2078
2079
static st_index_t
2080
strhash(st_data_t arg)
2081
{
2082
    register const char *string = (const char *)arg;
2083
    return st_hash(string, strlen(string), FNV1_32A_INIT);
2084
}
2085
2086
int
2087
st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2088
{
2089
    char c1, c2;
2090
2091
    while (1) {
2092
        c1 = *s1++;
2093
        c2 = *s2++;
2094
        if (c1 == '\0' || c2 == '\0') {
2095
            if (c1 != '\0') return 1;
2096
            if (c2 != '\0') return -1;
2097
            return 0;
2098
        }
2099
        if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2100
        if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2101
        if (c1 != c2) {
2102
            if (c1 > c2)
2103
                return 1;
2104
            else
2105
                return -1;
2106
        }
2107
    }
2108
}
2109
2110
int
2111
st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2112
{
2113
    char c1, c2;
2114
    size_t i;
2115
2116
    for (i = 0; i < n; i++) {
2117
        c1 = *s1++;
2118
        c2 = *s2++;
2119
        if (c1 == '\0' || c2 == '\0') {
2120
            if (c1 != '\0') return 1;
2121
            if (c2 != '\0') return -1;
2122
            return 0;
2123
        }
2124
        if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2125
        if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2126
        if (c1 != c2) {
2127
            if (c1 > c2)
2128
                return 1;
2129
            else
2130
                return -1;
2131
        }
2132
    }
2133
    return 0;
2134
}
2135
2136
NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2137
static st_index_t
2138
strcasehash(st_data_t arg)
2139
{
2140
    register const char *string = (const char *)arg;
2141
    register st_index_t hval = FNV1_32A_INIT;
2142
2143
    /*
2144
     * FNV-1a hash each octet in the buffer
2145
     */
2146
    while (*string) {
2147
  unsigned int c = (unsigned char)*string++;
2148
  if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2149
  hval ^= c;
2150
2151
  /* multiply by the 32 bit FNV magic prime mod 2^32 */
2152
  hval *= FNV_32_PRIME;
2153
    }
2154
    return hval;
2155
}
2156
2157
int
2158
st_numcmp(st_data_t x, st_data_t y)
2159
{
2160
    return x != y;
2161
}
2162
2163
st_index_t
2164
st_numhash(st_data_t n)
2165
{
2166
    enum {s1 = 11, s2 = 3};
2167
    return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2168
}
2169
2170
/* Expand TAB to be suitable for holding SIZ entries in total.
2171
   Pre-existing entries remain not deleted inside of TAB, but its bins
2172
   are cleared to expect future reconstruction. See rehash below. */
2173
static void
2174
st_expand_table(st_table *tab, st_index_t siz)
2175
{
2176
    st_table *tmp;
2177
    st_index_t n;
2178
2179
    if (siz <= get_allocated_entries(tab))
2180
        return; /* enough room already */
2181
2182
    tmp = st_init_table_with_size(tab->type, siz);
2183
    n = get_allocated_entries(tab);
2184
    MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2185
    free(tab->entries);
2186
    if (tab->bins != NULL)
2187
        free(tab->bins);
2188
    if (tmp->bins != NULL)
2189
        free(tmp->bins);
2190
    tab->entry_power = tmp->entry_power;
2191
    tab->bin_power = tmp->bin_power;
2192
    tab->size_ind = tmp->size_ind;
2193
    tab->entries = tmp->entries;
2194
    tab->bins = NULL;
2195
    tab->rebuilds_num++;
2196
    free(tmp);
2197
}
2198
2199
/* Rehash using linear search.  Return TRUE if we found that the table
2200
   was rebuilt.  */
2201
static int
2202
st_rehash_linear(st_table *tab)
2203
{
2204
    int eq_p, rebuilt_p;
2205
    st_index_t i, j;
2206
    st_table_entry *p, *q;
2207
    if (tab->bins) {
2208
        free(tab->bins);
2209
        tab->bins = NULL;
2210
    }
2211
    for (i = tab->entries_start; i < tab->entries_bound; i++) {
2212
        p = &tab->entries[i];
2213
        if (DELETED_ENTRY_P(p))
2214
            continue;
2215
        for (j = i + 1; j < tab->entries_bound; j++) {
2216
            q = &tab->entries[j];
2217
            if (DELETED_ENTRY_P(q))
2218
                continue;
2219
      DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2220
      if (EXPECT(rebuilt_p, 0))
2221
    return TRUE;
2222
      if (eq_p) {
2223
                st_assert(p < q);
2224
                *p = *q;
2225
                MARK_ENTRY_DELETED(q);
2226
                tab->num_entries--;
2227
                update_range_for_deleted(tab, j);
2228
            }
2229
        }
2230
    }
2231
    return FALSE;
2232
}
2233
2234
/* Rehash using index.  Return TRUE if we found that the table was
2235
   rebuilt.  */
2236
static int
2237
st_rehash_indexed(st_table *tab)
2238
{
2239
    int eq_p, rebuilt_p;
2240
    st_index_t i;
2241
    st_index_t const n = bins_size(tab);
2242
    unsigned int const size_ind = get_size_ind(tab);
2243
    st_index_t *bins = realloc(tab->bins, n);
2244
    st_assert(bins != NULL);
2245
    tab->bins = bins;
2246
    initialize_bins(tab);
2247
    for (i = tab->entries_start; i < tab->entries_bound; i++) {
2248
        st_table_entry *p = &tab->entries[i];
2249
        st_index_t ind;
2250
#ifdef QUADRATIC_PROBE
2251
        st_index_t d = 1;
2252
#else
2253
        st_index_t peterb = p->hash;
2254
#endif
2255
2256
        if (DELETED_ENTRY_P(p))
2257
            continue;
2258
2259
        ind = hash_bin(p->hash, tab);
2260
        for(;;) {
2261
            st_index_t bin = get_bin(bins, size_ind, ind);
2262
            if (EMPTY_OR_DELETED_BIN_P(bin)) {
2263
                /* ok, new room */
2264
                set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2265
                break;
2266
            }
2267
            else {
2268
                st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2269
    DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2270
    if (EXPECT(rebuilt_p, 0))
2271
        return TRUE;
2272
    if (eq_p) {
2273
        /* duplicated key; delete it */
2274
        st_assert(q < p);
2275
        q->record = p->record;
2276
        MARK_ENTRY_DELETED(p);
2277
        tab->num_entries--;
2278
        update_range_for_deleted(tab, bin);
2279
        break;
2280
    }
2281
    else {
2282
        /* hash collision; skip it */
2283
#ifdef QUADRATIC_PROBE
2284
        ind = hash_bin(ind + d, tab);
2285
        d++;
2286
#else
2287
        ind = secondary_hash(ind, tab, &peterb);
2288
#endif
2289
    }
2290
      }
2291
        }
2292
    }
2293
    return FALSE;
2294
}
2295
2296
/* Reconstruct TAB's bins according to TAB's entries. This function
2297
   permits conflicting keys inside of entries.  No errors are reported
2298
   then.  All but one of them are discarded silently. */
2299
static void
2300
st_rehash(st_table *tab)
2301
{
2302
    int rebuilt_p;
2303
2304
    do {
2305
  if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2306
      rebuilt_p = st_rehash_linear(tab);
2307
  else
2308
      rebuilt_p = st_rehash_indexed(tab);
2309
    } while (rebuilt_p);
2310
}
2311
2312
static st_data_t
2313
st_stringify(VALUE key)
2314
{
2315
    return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2316
        rb_hash_key_str(key) : key;
2317
}
2318
2319
static void
2320
st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2321
{
2322
    st_data_t k = st_stringify(key);
2323
    st_table_entry e;
2324
    e.hash = do_hash(k, tab);
2325
    e.key = k;
2326
    e.record = val;
2327
2328
    tab->entries[tab->entries_bound++] = e;
2329
    tab->num_entries++;
2330
    RB_OBJ_WRITTEN(hash, Qundef, k);
2331
    RB_OBJ_WRITTEN(hash, Qundef, val);
2332
}
2333
2334
static void
2335
st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2336
{
2337
    long i;
2338
2339
    for (i = 0; i < argc; /* */) {
2340
        st_data_t k = st_stringify(argv[i++]);
2341
        st_data_t v = argv[i++];
2342
        st_insert(tab, k, v);
2343
        RB_OBJ_WRITTEN(hash, Qundef, k);
2344
        RB_OBJ_WRITTEN(hash, Qundef, v);
2345
    }
2346
}
2347
2348
static void
2349
st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2350
{
2351
    long i;
2352
2353
    /* push elems */
2354
    for (i = 0; i < argc; /* */) {
2355
        VALUE key = argv[i++];
2356
        VALUE val = argv[i++];
2357
        st_insert_single(tab, hash, key, val);
2358
    }
2359
2360
    /* reindex */
2361
    st_rehash(tab);
2362
}
2363
2364
/* Mimics ruby's { foo => bar } syntax. This function is subpart
2365
   of rb_hash_bulk_insert. */
2366
void
2367
rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2368
{
2369
    st_index_t n, size = argc / 2;
2370
    st_table *tab = RHASH_ST_TABLE(hash);
2371
2372
    tab = RHASH_TBL_RAW(hash);
2373
    n = tab->entries_bound + size;
2374
    st_expand_table(tab, n);
2375
    if (UNLIKELY(tab->num_entries))
2376
        st_insert_generic(tab, argc, argv, hash);
2377
    else if (argc <= 2)
2378
        st_insert_single(tab, hash, argv[0], argv[1]);
2379
    else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2380
        st_insert_linear(tab, argc, argv, hash);
2381
    else
2382
        st_insert_generic(tab, argc, argv, hash);
2383
}
2384
#endif /* RUBY */