Coverage Report

Created: 2025-03-06 07:58

/src/gnutls/gl/hash.c
Line
Count
Source (jump to first uncovered line)
1
/* hash - hashing table processing.
2
3
   Copyright (C) 1998-2004, 2006-2007, 2009-2025 Free Software Foundation, Inc.
4
5
   Written by Jim Meyering, 1992.
6
7
   This file is free software: you can redistribute it and/or modify
8
   it under the terms of the GNU Lesser General Public License as
9
   published by the Free Software Foundation; either version 2.1 of the
10
   License, or (at your option) any later version.
11
12
   This file is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
   GNU Lesser General Public License for more details.
16
17
   You should have received a copy of the GNU Lesser General Public License
18
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
19
20
/* A generic hash table package.  */
21
22
/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23
   of malloc.  If you change USE_OBSTACK, you have to recompile!  */
24
25
#include <config.h>
26
27
#include "hash.h"
28
29
#include "bitrotate.h"
30
#include "xalloc-oversized.h"
31
32
#include <errno.h>
33
#include <stdint.h>
34
#include <stdio.h>
35
#include <stdlib.h>
36
37
#if USE_OBSTACK
38
# include "obstack.h"
39
# ifndef obstack_chunk_alloc
40
#  define obstack_chunk_alloc malloc
41
# endif
42
# ifndef obstack_chunk_free
43
#  define obstack_chunk_free free
44
# endif
45
#endif
46
47
struct hash_entry
48
  {
49
    void *data;
50
    struct hash_entry *next;
51
  };
52
53
struct hash_table
54
  {
55
    /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
56
       for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
57
       are not empty, there are N_ENTRIES active entries in the table.  */
58
    struct hash_entry *bucket;
59
    struct hash_entry const *bucket_limit;
60
    size_t n_buckets;
61
    size_t n_buckets_used;
62
    size_t n_entries;
63
64
    /* Tuning arguments, kept in a physically separate structure.  */
65
    const Hash_tuning *tuning;
66
67
    /* Three functions are given to 'hash_initialize', see the documentation
68
       block for this function.  In a word, HASHER randomizes a user entry
69
       into a number up from 0 up to some maximum minus 1; COMPARATOR returns
70
       true if two user entries compare equally; and DATA_FREER is the cleanup
71
       function for a user entry.  */
72
    Hash_hasher hasher;
73
    Hash_comparator comparator;
74
    Hash_data_freer data_freer;
75
76
    /* A linked list of freed struct hash_entry structs.  */
77
    struct hash_entry *free_entry_list;
78
79
#if USE_OBSTACK
80
    /* Whenever obstacks are used, it is possible to allocate all overflowed
81
       entries into a single stack, so they all can be freed in a single
82
       operation.  It is not clear if the speedup is worth the trouble.  */
83
    struct obstack entry_stack;
84
#endif
85
  };
86
87
/* A hash table contains many internal entries, each holding a pointer to
88
   some user-provided data (also called a user entry).  An entry indistinctly
89
   refers to both the internal entry and its associated user entry.  A user
90
   entry contents may be hashed by a randomization function (the hashing
91
   function, or just "hasher" for short) into a number (or "slot") between 0
92
   and the current table size.  At each slot position in the hash table,
93
   starts a linked chain of entries for which the user data all hash to this
94
   slot.  A bucket is the collection of all entries hashing to the same slot.
95
96
   A good "hasher" function will distribute entries rather evenly in buckets.
97
   In the ideal case, the length of each bucket is roughly the number of
98
   entries divided by the table size.  Finding the slot for a data is usually
99
   done in constant time by the "hasher", and the later finding of a precise
100
   entry is linear in time with the size of the bucket.  Consequently, a
101
   larger hash table size (that is, a larger number of buckets) is prone to
102
   yielding shorter chains, *given* the "hasher" function behaves properly.
103
104
   Long buckets slow down the lookup algorithm.  One might use big hash table
105
   sizes in hope to reduce the average length of buckets, but this might
106
   become inordinate, as unused slots in the hash table take some space.  The
107
   best bet is to make sure you are using a good "hasher" function (beware
108
   that those are not that easy to write! :-), and to use a table size
109
   larger than the actual number of entries.  */
110
111
/* If an insertion makes the ratio of nonempty buckets to table size larger
112
   than the growth threshold (a number between 0.0 and 1.0), then increase
113
   the table size by multiplying by the growth factor (a number greater than
114
   1.0).  The growth threshold defaults to 0.8, and the growth factor
115
   defaults to 1.414, meaning that the table will have doubled its size
116
   every second time 80% of the buckets get used.  */
117
#define DEFAULT_GROWTH_THRESHOLD 0.8f
118
#define DEFAULT_GROWTH_FACTOR 1.414f
119
120
/* If a deletion empties a bucket and causes the ratio of used buckets to
121
   table size to become smaller than the shrink threshold (a number between
122
   0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
123
   number greater than the shrink threshold but smaller than 1.0).  The shrink
124
   threshold and factor default to 0.0 and 1.0, meaning that the table never
125
   shrinks.  */
126
#define DEFAULT_SHRINK_THRESHOLD 0.0f
127
#define DEFAULT_SHRINK_FACTOR 1.0f
128
129
/* Use this to initialize or reset a TUNING structure to
130
   some sensible values. */
131
static const Hash_tuning default_tuning =
132
  {
133
    DEFAULT_SHRINK_THRESHOLD,
134
    DEFAULT_SHRINK_FACTOR,
135
    DEFAULT_GROWTH_THRESHOLD,
136
    DEFAULT_GROWTH_FACTOR,
137
    false
138
  };
139
140
/* Information and lookup.  */
141
142
size_t
143
hash_get_n_buckets (const Hash_table *table)
144
0
{
145
0
  return table->n_buckets;
146
0
}
147
148
size_t
149
hash_get_n_buckets_used (const Hash_table *table)
150
0
{
151
0
  return table->n_buckets_used;
152
0
}
153
154
size_t
155
hash_get_n_entries (const Hash_table *table)
156
0
{
157
0
  return table->n_entries;
158
0
}
159
160
size_t
161
hash_get_max_bucket_length (const Hash_table *table)
162
0
{
163
0
  struct hash_entry const *bucket;
164
0
  size_t max_bucket_length = 0;
165
166
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
167
0
    {
168
0
      if (bucket->data)
169
0
        {
170
0
          struct hash_entry const *cursor = bucket;
171
0
          size_t bucket_length = 1;
172
173
0
          while (cursor = cursor->next, cursor)
174
0
            bucket_length++;
175
176
0
          if (bucket_length > max_bucket_length)
177
0
            max_bucket_length = bucket_length;
178
0
        }
179
0
    }
180
181
0
  return max_bucket_length;
182
0
}
183
184
bool
185
hash_table_ok (const Hash_table *table)
186
0
{
187
0
  struct hash_entry const *bucket;
188
0
  size_t n_buckets_used = 0;
189
0
  size_t n_entries = 0;
190
191
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
192
0
    {
193
0
      if (bucket->data)
194
0
        {
195
0
          struct hash_entry const *cursor = bucket;
196
197
          /* Count bucket head.  */
198
0
          n_buckets_used++;
199
0
          n_entries++;
200
201
          /* Count bucket overflow.  */
202
0
          while (cursor = cursor->next, cursor)
203
0
            n_entries++;
204
0
        }
205
0
    }
206
207
0
  if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
208
0
    return true;
209
210
0
  return false;
211
0
}
212
213
void
214
hash_print_statistics (const Hash_table *table, FILE *stream)
215
0
{
216
0
  size_t n_entries = hash_get_n_entries (table);
217
0
  size_t n_buckets = hash_get_n_buckets (table);
218
0
  size_t n_buckets_used = hash_get_n_buckets_used (table);
219
0
  size_t max_bucket_length = hash_get_max_bucket_length (table);
220
221
0
  fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
222
0
  fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
223
0
  fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
224
0
           (unsigned long int) n_buckets_used,
225
0
           (100.0 * n_buckets_used) / n_buckets);
226
0
  fprintf (stream, "max bucket length: %lu\n",
227
0
           (unsigned long int) max_bucket_length);
228
0
}
229
230
/* Hash KEY and return a pointer to the selected bucket.
231
   If TABLE->hasher misbehaves, abort.  */
232
static struct hash_entry *
233
safe_hasher (const Hash_table *table, const void *key)
234
0
{
235
0
  size_t n = table->hasher (key, table->n_buckets);
236
0
  if (! (n < table->n_buckets))
237
0
    abort ();
238
0
  return table->bucket + n;
239
0
}
240
241
void *
242
hash_lookup (const Hash_table *table, const void *entry)
243
0
{
244
0
  struct hash_entry const *bucket = safe_hasher (table, entry);
245
0
  struct hash_entry const *cursor;
246
247
0
  if (bucket->data == NULL)
248
0
    return NULL;
249
250
0
  for (cursor = bucket; cursor; cursor = cursor->next)
251
0
    if (entry == cursor->data || table->comparator (entry, cursor->data))
252
0
      return cursor->data;
253
254
0
  return NULL;
255
0
}
256
257
/* Walking.  */
258
259
void *
260
hash_get_first (const Hash_table *table)
261
0
{
262
0
  struct hash_entry const *bucket;
263
264
0
  if (table->n_entries == 0)
265
0
    return NULL;
266
267
0
  for (bucket = table->bucket; ; bucket++)
268
0
    if (! (bucket < table->bucket_limit))
269
0
      abort ();
270
0
    else if (bucket->data)
271
0
      return bucket->data;
272
0
}
273
274
void *
275
hash_get_next (const Hash_table *table, const void *entry)
276
0
{
277
0
  struct hash_entry const *bucket = safe_hasher (table, entry);
278
0
  struct hash_entry const *cursor;
279
280
  /* Find next entry in the same bucket.  */
281
0
  cursor = bucket;
282
0
  do
283
0
    {
284
0
      if (cursor->data == entry && cursor->next)
285
0
        return cursor->next->data;
286
0
      cursor = cursor->next;
287
0
    }
288
0
  while (cursor != NULL);
289
290
  /* Find first entry in any subsequent bucket.  */
291
0
  while (++bucket < table->bucket_limit)
292
0
    if (bucket->data)
293
0
      return bucket->data;
294
295
  /* None found.  */
296
0
  return NULL;
297
0
}
298
299
size_t
300
hash_get_entries (const Hash_table *table, void **buffer,
301
                  size_t buffer_size)
302
0
{
303
0
  size_t counter = 0;
304
0
  struct hash_entry const *bucket;
305
0
  struct hash_entry const *cursor;
306
307
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
308
0
    {
309
0
      if (bucket->data)
310
0
        {
311
0
          for (cursor = bucket; cursor; cursor = cursor->next)
312
0
            {
313
0
              if (counter >= buffer_size)
314
0
                return counter;
315
0
              buffer[counter++] = cursor->data;
316
0
            }
317
0
        }
318
0
    }
319
320
0
  return counter;
321
0
}
322
323
size_t
324
hash_do_for_each (const Hash_table *table, Hash_processor processor,
325
                  void *processor_data)
326
0
{
327
0
  size_t counter = 0;
328
0
  struct hash_entry const *bucket;
329
0
  struct hash_entry const *cursor;
330
331
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
332
0
    {
333
0
      if (bucket->data)
334
0
        {
335
0
          for (cursor = bucket; cursor; cursor = cursor->next)
336
0
            {
337
0
              if (! processor (cursor->data, processor_data))
338
0
                return counter;
339
0
              counter++;
340
0
            }
341
0
        }
342
0
    }
343
344
0
  return counter;
345
0
}
346
347
/* Allocation and clean-up.  */
348
349
#if USE_DIFF_HASH
350
351
/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
352
   B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
353
   Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
354
   algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
355
   may not be good for your application."  */
356
357
size_t
358
hash_string (const char *string, size_t n_buckets)
359
{
360
# define HASH_ONE_CHAR(Value, Byte) \
361
  ((Byte) + rotl_sz (Value, 7))
362
363
  size_t value = 0;
364
  unsigned char ch;
365
366
  for (; (ch = *string); string++)
367
    value = HASH_ONE_CHAR (value, ch);
368
  return value % n_buckets;
369
370
# undef HASH_ONE_CHAR
371
}
372
373
#else /* not USE_DIFF_HASH */
374
375
/* This one comes from 'recode', and performs a bit better than the above as
376
   per a few experiments.  It is inspired from a hashing routine found in the
377
   very old Cyber 'snoop', itself written in typical Greg Mansfield style.
378
   (By the way, what happened to this excellent man?  Is he still alive?)  */
379
380
size_t
381
hash_string (const char *string, size_t n_buckets)
382
0
{
383
0
  size_t value = 0;
384
0
  unsigned char ch;
385
386
0
  for (; (ch = *string); string++)
387
0
    value = (value * 31 + ch) % n_buckets;
388
0
  return value;
389
0
}
390
391
#endif /* not USE_DIFF_HASH */
392
393
/* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
394
   number at least equal to 11.  */
395
396
static bool _GL_ATTRIBUTE_CONST
397
is_prime (size_t candidate)
398
0
{
399
0
  size_t divisor = 3;
400
0
  size_t square = divisor * divisor;
401
402
0
  while (square < candidate && (candidate % divisor))
403
0
    {
404
0
      divisor++;
405
0
      square += 4 * divisor;
406
0
      divisor++;
407
0
    }
408
409
0
  return (candidate % divisor ? true : false);
410
0
}
411
412
/* Round a given CANDIDATE number up to the nearest prime, and return that
413
   prime.  Primes lower than 10 are merely skipped.  */
414
415
static size_t _GL_ATTRIBUTE_CONST
416
next_prime (size_t candidate)
417
0
{
418
  /* Skip small primes.  */
419
0
  if (candidate < 10)
420
0
    candidate = 10;
421
422
  /* Make it definitely odd.  */
423
0
  candidate |= 1;
424
425
0
  while (SIZE_MAX != candidate && !is_prime (candidate))
426
0
    candidate += 2;
427
428
0
  return candidate;
429
0
}
430
431
void
432
hash_reset_tuning (Hash_tuning *tuning)
433
0
{
434
0
  *tuning = default_tuning;
435
0
}
436
437
/* If the user passes a NULL hasher, we hash the raw pointer.  */
438
static size_t
439
raw_hasher (const void *data, size_t n)
440
0
{
441
  /* When hashing unique pointers, it is often the case that they were
442
     generated by malloc and thus have the property that the low-order
443
     bits are 0.  As this tends to give poorer performance with small
444
     tables, we rotate the pointer value before performing division,
445
     in an attempt to improve hash quality.  */
446
0
  size_t val = rotr_sz ((size_t) data, 3);
447
0
  return val % n;
448
0
}
449
450
/* If the user passes a NULL comparator, we use pointer comparison.  */
451
static bool
452
raw_comparator (const void *a, const void *b)
453
0
{
454
0
  return a == b;
455
0
}
456
457
458
/* For the given hash TABLE, check the user supplied tuning structure for
459
   reasonable values, and return true if there is no gross error with it.
460
   Otherwise, definitively reset the TUNING field to some acceptable default
461
   in the hash table (that is, the user loses the right of further modifying
462
   tuning arguments), and return false.  */
463
464
static bool
465
check_tuning (Hash_table *table)
466
0
{
467
0
  const Hash_tuning *tuning = table->tuning;
468
0
  float epsilon;
469
0
  if (tuning == &default_tuning)
470
0
    return true;
471
472
  /* Be a bit stricter than mathematics would require, so that
473
     rounding errors in size calculations do not cause allocations to
474
     fail to grow or shrink as they should.  The smallest allocation
475
     is 11 (due to next_prime's algorithm), so an epsilon of 0.1
476
     should be good enough.  */
477
0
  epsilon = 0.1f;
478
479
0
  if (epsilon < tuning->growth_threshold
480
0
      && tuning->growth_threshold < 1 - epsilon
481
0
      && 1 + epsilon < tuning->growth_factor
482
0
      && 0 <= tuning->shrink_threshold
483
0
      && tuning->shrink_threshold + epsilon < tuning->shrink_factor
484
0
      && tuning->shrink_factor <= 1
485
0
      && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
486
0
    return true;
487
488
0
  table->tuning = &default_tuning;
489
0
  return false;
490
0
}
491
492
/* Compute the size of the bucket array for the given CANDIDATE and
493
   TUNING, or return 0 if there is no possible way to allocate that
494
   many entries.  */
495
496
static size_t _GL_ATTRIBUTE_PURE
497
compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
498
0
{
499
0
  if (!tuning->is_n_buckets)
500
0
    {
501
0
      float new_candidate = candidate / tuning->growth_threshold;
502
0
      if ((float) SIZE_MAX <= new_candidate)
503
0
        goto nomem;
504
0
      candidate = new_candidate;
505
0
    }
506
0
  candidate = next_prime (candidate);
507
0
  if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
508
0
    goto nomem;
509
0
  return candidate;
510
511
0
 nomem:
512
0
  errno = ENOMEM;
513
0
  return 0;
514
0
}
515
516
Hash_table *
517
hash_initialize (size_t candidate, const Hash_tuning *tuning,
518
                 Hash_hasher hasher, Hash_comparator comparator,
519
                 Hash_data_freer data_freer)
520
0
{
521
0
  Hash_table *table;
522
523
0
  if (hasher == NULL)
524
0
    hasher = raw_hasher;
525
0
  if (comparator == NULL)
526
0
    comparator = raw_comparator;
527
528
0
  table = malloc (sizeof *table);
529
0
  if (table == NULL)
530
0
    return NULL;
531
532
0
  if (!tuning)
533
0
    tuning = &default_tuning;
534
0
  table->tuning = tuning;
535
0
  if (!check_tuning (table))
536
0
    {
537
      /* Fail if the tuning options are invalid.  This is the only occasion
538
         when the user gets some feedback about it.  Once the table is created,
539
         if the user provides invalid tuning options, we silently revert to
540
         using the defaults, and ignore further request to change the tuning
541
         options.  */
542
0
      errno = EINVAL;
543
0
      goto fail;
544
0
    }
545
546
0
  table->n_buckets = compute_bucket_size (candidate, tuning);
547
0
  if (!table->n_buckets)
548
0
    goto fail;
549
550
0
  table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
551
0
  if (table->bucket == NULL)
552
0
    goto fail;
553
0
  table->bucket_limit = table->bucket + table->n_buckets;
554
0
  table->n_buckets_used = 0;
555
0
  table->n_entries = 0;
556
557
0
  table->hasher = hasher;
558
0
  table->comparator = comparator;
559
0
  table->data_freer = data_freer;
560
561
0
  table->free_entry_list = NULL;
562
#if USE_OBSTACK
563
  obstack_init (&table->entry_stack);
564
#endif
565
0
  return table;
566
567
0
 fail:
568
0
  free (table);
569
0
  return NULL;
570
0
}
571
572
void
573
hash_clear (Hash_table *table)
574
0
{
575
0
  struct hash_entry *bucket;
576
577
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
578
0
    {
579
0
      if (bucket->data)
580
0
        {
581
0
          struct hash_entry *cursor;
582
0
          struct hash_entry *next;
583
584
          /* Free the bucket overflow.  */
585
0
          for (cursor = bucket->next; cursor; cursor = next)
586
0
            {
587
0
              if (table->data_freer)
588
0
                table->data_freer (cursor->data);
589
0
              cursor->data = NULL;
590
591
0
              next = cursor->next;
592
              /* Relinking is done one entry at a time, as it is to be expected
593
                 that overflows are either rare or short.  */
594
0
              cursor->next = table->free_entry_list;
595
0
              table->free_entry_list = cursor;
596
0
            }
597
598
          /* Free the bucket head.  */
599
0
          if (table->data_freer)
600
0
            table->data_freer (bucket->data);
601
0
          bucket->data = NULL;
602
0
          bucket->next = NULL;
603
0
        }
604
0
    }
605
606
0
  table->n_buckets_used = 0;
607
0
  table->n_entries = 0;
608
0
}
609
610
void
611
hash_free (Hash_table *table)
612
0
{
613
0
  struct hash_entry *bucket;
614
0
  struct hash_entry *cursor;
615
0
  struct hash_entry *next;
616
0
  int err = errno;
617
618
  /* Call the user data_freer function.  */
619
0
  if (table->data_freer && table->n_entries)
620
0
    {
621
0
      for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
622
0
        {
623
0
          if (bucket->data)
624
0
            {
625
0
              for (cursor = bucket; cursor; cursor = cursor->next)
626
0
                table->data_freer (cursor->data);
627
0
            }
628
0
        }
629
0
    }
630
631
#if USE_OBSTACK
632
633
  obstack_free (&table->entry_stack, NULL);
634
635
#else
636
637
  /* Free all bucket overflowed entries.  */
638
0
  for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
639
0
    {
640
0
      for (cursor = bucket->next; cursor; cursor = next)
641
0
        {
642
0
          next = cursor->next;
643
0
          free (cursor);
644
0
        }
645
0
    }
646
647
  /* Also reclaim the internal list of previously freed entries.  */
648
0
  for (cursor = table->free_entry_list; cursor; cursor = next)
649
0
    {
650
0
      next = cursor->next;
651
0
      free (cursor);
652
0
    }
653
654
0
#endif
655
656
  /* Free the remainder of the hash table structure.  */
657
0
  free (table->bucket);
658
0
  free (table);
659
660
0
  errno = err;
661
0
}
662
663
/* Insertion and deletion.  */
664
665
/* Get a new hash entry for a bucket overflow, possibly by recycling a
666
   previously freed one.  If this is not possible, allocate a new one.  */
667
668
static struct hash_entry *
669
allocate_entry (Hash_table *table)
670
0
{
671
0
  struct hash_entry *new;
672
673
0
  if (table->free_entry_list)
674
0
    {
675
0
      new = table->free_entry_list;
676
0
      table->free_entry_list = new->next;
677
0
    }
678
0
  else
679
0
    {
680
#if USE_OBSTACK
681
      new = obstack_alloc (&table->entry_stack, sizeof *new);
682
#else
683
0
      new = malloc (sizeof *new);
684
0
#endif
685
0
    }
686
687
0
  return new;
688
0
}
689
690
/* Free a hash entry which was part of some bucket overflow,
691
   saving it for later recycling.  */
692
693
static void
694
free_entry (Hash_table *table, struct hash_entry *entry)
695
0
{
696
0
  entry->data = NULL;
697
0
  entry->next = table->free_entry_list;
698
0
  table->free_entry_list = entry;
699
0
}
700
701
/* This private function is used to help with insertion and deletion.  When
702
   ENTRY matches an entry in the table, return a pointer to the corresponding
703
   user data and set *BUCKET_HEAD to the head of the selected bucket.
704
   Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
705
   the table, unlink the matching entry.  */
706
707
static void *
708
hash_find_entry (Hash_table *table, const void *entry,
709
                 struct hash_entry **bucket_head, bool delete)
710
0
{
711
0
  struct hash_entry *bucket = safe_hasher (table, entry);
712
0
  struct hash_entry *cursor;
713
714
0
  *bucket_head = bucket;
715
716
  /* Test for empty bucket.  */
717
0
  if (bucket->data == NULL)
718
0
    return NULL;
719
720
  /* See if the entry is the first in the bucket.  */
721
0
  if (entry == bucket->data || table->comparator (entry, bucket->data))
722
0
    {
723
0
      void *data = bucket->data;
724
725
0
      if (delete)
726
0
        {
727
0
          if (bucket->next)
728
0
            {
729
0
              struct hash_entry *next = bucket->next;
730
731
              /* Bump the first overflow entry into the bucket head, then save
732
                 the previous first overflow entry for later recycling.  */
733
0
              *bucket = *next;
734
0
              free_entry (table, next);
735
0
            }
736
0
          else
737
0
            {
738
0
              bucket->data = NULL;
739
0
            }
740
0
        }
741
742
0
      return data;
743
0
    }
744
745
  /* Scan the bucket overflow.  */
746
0
  for (cursor = bucket; cursor->next; cursor = cursor->next)
747
0
    {
748
0
      if (entry == cursor->next->data
749
0
          || table->comparator (entry, cursor->next->data))
750
0
        {
751
0
          void *data = cursor->next->data;
752
753
0
          if (delete)
754
0
            {
755
0
              struct hash_entry *next = cursor->next;
756
757
              /* Unlink the entry to delete, then save the freed entry for later
758
                 recycling.  */
759
0
              cursor->next = next->next;
760
0
              free_entry (table, next);
761
0
            }
762
763
0
          return data;
764
0
        }
765
0
    }
766
767
  /* No entry found.  */
768
0
  return NULL;
769
0
}
770
771
/* Internal helper, to move entries from SRC to DST.  Both tables must
772
   share the same free entry list.  If SAFE, only move overflow
773
   entries, saving bucket heads for later, so that no allocations will
774
   occur.  Return false (setting errno) if the free entry list is
775
   exhausted and an allocation fails.  */
776
777
static bool
778
transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
779
0
{
780
0
  struct hash_entry *bucket;
781
0
  struct hash_entry *cursor;
782
0
  struct hash_entry *next;
783
0
  for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
784
0
    if (bucket->data)
785
0
      {
786
0
        void *data;
787
0
        struct hash_entry *new_bucket;
788
789
        /* Within each bucket, transfer overflow entries first and
790
           then the bucket head, to minimize memory pressure.  After
791
           all, the only time we might allocate is when moving the
792
           bucket head, but moving overflow entries first may create
793
           free entries that can be recycled by the time we finally
794
           get to the bucket head.  */
795
0
        for (cursor = bucket->next; cursor; cursor = next)
796
0
          {
797
0
            data = cursor->data;
798
0
            new_bucket = safe_hasher (dst, data);
799
800
0
            next = cursor->next;
801
802
0
            if (new_bucket->data)
803
0
              {
804
                /* Merely relink an existing entry, when moving from a
805
                   bucket overflow into a bucket overflow.  */
806
0
                cursor->next = new_bucket->next;
807
0
                new_bucket->next = cursor;
808
0
              }
809
0
            else
810
0
              {
811
                /* Free an existing entry, when moving from a bucket
812
                   overflow into a bucket header.  */
813
0
                new_bucket->data = data;
814
0
                dst->n_buckets_used++;
815
0
                free_entry (dst, cursor);
816
0
              }
817
0
          }
818
        /* Now move the bucket head.  Be sure that if we fail due to
819
           allocation failure that the src table is in a consistent
820
           state.  */
821
0
        data = bucket->data;
822
0
        bucket->next = NULL;
823
0
        if (safe)
824
0
          continue;
825
0
        new_bucket = safe_hasher (dst, data);
826
827
0
        if (new_bucket->data)
828
0
          {
829
            /* Allocate or recycle an entry, when moving from a bucket
830
               header into a bucket overflow.  */
831
0
            struct hash_entry *new_entry = allocate_entry (dst);
832
833
0
            if (new_entry == NULL)
834
0
              return false;
835
836
0
            new_entry->data = data;
837
0
            new_entry->next = new_bucket->next;
838
0
            new_bucket->next = new_entry;
839
0
          }
840
0
        else
841
0
          {
842
            /* Move from one bucket header to another.  */
843
0
            new_bucket->data = data;
844
0
            dst->n_buckets_used++;
845
0
          }
846
0
        bucket->data = NULL;
847
0
        src->n_buckets_used--;
848
0
      }
849
0
  return true;
850
0
}
851
852
bool
853
hash_rehash (Hash_table *table, size_t candidate)
854
0
{
855
0
  Hash_table storage;
856
0
  Hash_table *new_table;
857
0
  size_t new_size = compute_bucket_size (candidate, table->tuning);
858
859
0
  if (!new_size)
860
0
    return false;
861
0
  if (new_size == table->n_buckets)
862
0
    return true;
863
0
  new_table = &storage;
864
0
  new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
865
0
  if (new_table->bucket == NULL)
866
0
    return false;
867
0
  new_table->n_buckets = new_size;
868
0
  new_table->bucket_limit = new_table->bucket + new_size;
869
0
  new_table->n_buckets_used = 0;
870
0
  new_table->n_entries = 0;
871
0
  new_table->tuning = table->tuning;
872
0
  new_table->hasher = table->hasher;
873
0
  new_table->comparator = table->comparator;
874
0
  new_table->data_freer = table->data_freer;
875
876
  /* In order for the transfer to successfully complete, we need
877
     additional overflow entries when distinct buckets in the old
878
     table collide into a common bucket in the new table.  The worst
879
     case possible is a hasher that gives a good spread with the old
880
     size, but returns a constant with the new size; if we were to
881
     guarantee table->n_buckets_used-1 free entries in advance, then
882
     the transfer would be guaranteed to not allocate memory.
883
     However, for large tables, a guarantee of no further allocation
884
     introduces a lot of extra memory pressure, all for an unlikely
885
     corner case (most rehashes reduce, rather than increase, the
886
     number of overflow entries needed).  So, we instead ensure that
887
     the transfer process can be reversed if we hit a memory
888
     allocation failure mid-transfer.  */
889
890
  /* Merely reuse the extra old space into the new table.  */
891
#if USE_OBSTACK
892
  new_table->entry_stack = table->entry_stack;
893
#endif
894
0
  new_table->free_entry_list = table->free_entry_list;
895
896
0
  if (transfer_entries (new_table, table, false))
897
0
    {
898
      /* Entries transferred successfully; tie up the loose ends.  */
899
0
      free (table->bucket);
900
0
      table->bucket = new_table->bucket;
901
0
      table->bucket_limit = new_table->bucket_limit;
902
0
      table->n_buckets = new_table->n_buckets;
903
0
      table->n_buckets_used = new_table->n_buckets_used;
904
0
      table->free_entry_list = new_table->free_entry_list;
905
      /* table->n_entries and table->entry_stack already hold their value.  */
906
0
      return true;
907
0
    }
908
909
  /* We've allocated new_table->bucket (and possibly some entries),
910
     exhausted the free list, and moved some but not all entries into
911
     new_table.  We must undo the partial move before returning
912
     failure.  The only way to get into this situation is if new_table
913
     uses fewer buckets than the old table, so we will reclaim some
914
     free entries as overflows in the new table are put back into
915
     distinct buckets in the old table.
916
917
     There are some pathological cases where a single pass through the
918
     table requires more intermediate overflow entries than using two
919
     passes.  Two passes give worse cache performance and takes
920
     longer, but at this point, we're already out of memory, so slow
921
     and safe is better than failure.  */
922
0
  int err = errno;
923
0
  table->free_entry_list = new_table->free_entry_list;
924
0
  if (! (transfer_entries (table, new_table, true)
925
0
         && transfer_entries (table, new_table, false)))
926
0
    abort ();
927
  /* table->n_entries already holds its value.  */
928
0
  free (new_table->bucket);
929
0
  errno = err;
930
0
  return false;
931
0
}
932
933
int
934
hash_insert_if_absent (Hash_table *table, void const *entry,
935
                       void const **matched_ent)
936
0
{
937
0
  void *data;
938
0
  struct hash_entry *bucket;
939
940
  /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
941
     to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
942
     to indicate an empty bucket.  */
943
0
  if (! entry)
944
0
    abort ();
945
946
  /* If there's a matching entry already in the table, return that.  */
947
0
  if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
948
0
    {
949
0
      if (matched_ent)
950
0
        *matched_ent = data;
951
0
      return 0;
952
0
    }
953
954
  /* If the growth threshold of the buckets in use has been reached, increase
955
     the table size and rehash.  There's no point in checking the number of
956
     entries:  if the hashing function is ill-conditioned, rehashing is not
957
     likely to improve it.  */
958
959
0
  if (table->n_buckets_used
960
0
      > table->tuning->growth_threshold * table->n_buckets)
961
0
    {
962
      /* Check more fully, before starting real work.  If tuning arguments
963
         became invalid, the second check will rely on proper defaults.  */
964
0
      check_tuning (table);
965
0
      if (table->n_buckets_used
966
0
          > table->tuning->growth_threshold * table->n_buckets)
967
0
        {
968
0
          const Hash_tuning *tuning = table->tuning;
969
0
          float candidate =
970
0
            (tuning->is_n_buckets
971
0
             ? (table->n_buckets * tuning->growth_factor)
972
0
             : (table->n_buckets * tuning->growth_factor
973
0
                * tuning->growth_threshold));
974
975
0
          if ((float) SIZE_MAX <= candidate)
976
0
            {
977
0
              errno = ENOMEM;
978
0
              return -1;
979
0
            }
980
981
          /* If the rehash fails, arrange to return NULL.  */
982
0
          if (!hash_rehash (table, candidate))
983
0
            return -1;
984
985
          /* Update the bucket we are interested in.  */
986
0
          if (hash_find_entry (table, entry, &bucket, false) != NULL)
987
0
            abort ();
988
0
        }
989
0
    }
990
991
  /* ENTRY is not matched, it should be inserted.  */
992
993
0
  if (bucket->data)
994
0
    {
995
0
      struct hash_entry *new_entry = allocate_entry (table);
996
997
0
      if (new_entry == NULL)
998
0
        return -1;
999
1000
      /* Add ENTRY in the overflow of the bucket.  */
1001
1002
0
      new_entry->data = (void *) entry;
1003
0
      new_entry->next = bucket->next;
1004
0
      bucket->next = new_entry;
1005
0
      table->n_entries++;
1006
0
      return 1;
1007
0
    }
1008
1009
  /* Add ENTRY right in the bucket head.  */
1010
1011
0
  bucket->data = (void *) entry;
1012
0
  table->n_entries++;
1013
0
  table->n_buckets_used++;
1014
1015
0
  return 1;
1016
0
}
1017
1018
void *
1019
hash_insert (Hash_table *table, void const *entry)
1020
0
{
1021
0
  void const *matched_ent;
1022
0
  int err = hash_insert_if_absent (table, entry, &matched_ent);
1023
0
  return (err == -1
1024
0
          ? NULL
1025
0
          : (void *) (err == 0 ? matched_ent : entry));
1026
0
}
1027
1028
void *
1029
hash_remove (Hash_table *table, const void *entry)
1030
0
{
1031
0
  void *data;
1032
0
  struct hash_entry *bucket;
1033
1034
0
  data = hash_find_entry (table, entry, &bucket, true);
1035
0
  if (!data)
1036
0
    return NULL;
1037
1038
0
  table->n_entries--;
1039
0
  if (!bucket->data)
1040
0
    {
1041
0
      table->n_buckets_used--;
1042
1043
      /* If the shrink threshold of the buckets in use has been reached,
1044
         rehash into a smaller table.  */
1045
1046
0
      if (table->n_buckets_used
1047
0
          < table->tuning->shrink_threshold * table->n_buckets)
1048
0
        {
1049
          /* Check more fully, before starting real work.  If tuning arguments
1050
             became invalid, the second check will rely on proper defaults.  */
1051
0
          check_tuning (table);
1052
0
          if (table->n_buckets_used
1053
0
              < table->tuning->shrink_threshold * table->n_buckets)
1054
0
            {
1055
0
              const Hash_tuning *tuning = table->tuning;
1056
0
              size_t candidate =
1057
0
                (tuning->is_n_buckets
1058
0
                 ? table->n_buckets * tuning->shrink_factor
1059
0
                 : (table->n_buckets * tuning->shrink_factor
1060
0
                    * tuning->growth_threshold));
1061
1062
0
              if (!hash_rehash (table, candidate))
1063
0
                {
1064
                  /* Failure to allocate memory in an attempt to
1065
                     shrink the table is not fatal.  But since memory
1066
                     is low, we can at least be kind and free any
1067
                     spare entries, rather than keeping them tied up
1068
                     in the free entry list.  */
1069
0
#if ! USE_OBSTACK
1070
0
                  struct hash_entry *cursor = table->free_entry_list;
1071
0
                  struct hash_entry *next;
1072
0
                  while (cursor)
1073
0
                    {
1074
0
                      next = cursor->next;
1075
0
                      free (cursor);
1076
0
                      cursor = next;
1077
0
                    }
1078
0
                  table->free_entry_list = NULL;
1079
0
#endif
1080
0
                }
1081
0
            }
1082
0
        }
1083
0
    }
1084
1085
0
  return data;
1086
0
}
1087
1088
void *
1089
hash_delete (Hash_table *table, const void *entry)
1090
0
{
1091
0
  return hash_remove (table, entry);
1092
0
}
1093
1094
/* Testing.  */
1095
1096
#if TESTING
1097
1098
void
1099
hash_print (const Hash_table *table)
1100
{
1101
  struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1102
1103
  for ( ; bucket < table->bucket_limit; bucket++)
1104
    {
1105
      struct hash_entry *cursor;
1106
1107
      if (bucket)
1108
        printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1109
1110
      for (cursor = bucket; cursor; cursor = cursor->next)
1111
        {
1112
          char const *s = cursor->data;
1113
          /* FIXME */
1114
          if (s)
1115
            printf ("  %s\n", s);
1116
        }
1117
    }
1118
}
1119
1120
#endif /* TESTING */