Coverage Report

Created: 2025-01-28 06:17

/src/mupdf/source/fitz/hash.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
#include <assert.h>
27
28
/*
29
  Simple hashtable with open addressing linear probe.
30
  Unlike text book examples, removing entries works
31
  correctly in this implementation, so it won't start
32
  exhibiting bad behaviour if entries are inserted
33
  and removed frequently.
34
*/
35
36
typedef struct
37
{
38
  unsigned char key[FZ_HASH_TABLE_KEY_LENGTH];
39
  void *val;
40
} fz_hash_entry;
41
42
struct fz_hash_table
43
{
44
  int keylen;
45
  int size;
46
  int load;
47
  int lock; /* -1 or the lock used to protect this hash table */
48
  fz_hash_table_drop_fn *drop_val;
49
  fz_hash_entry *ents;
50
};
51
52
static unsigned hash(const unsigned char *s, int len)
53
6.35M
{
54
6.35M
  unsigned val = 0;
55
6.35M
  int i;
56
127M
  for (i = 0; i < len; i++)
57
120M
  {
58
120M
    val += s[i];
59
120M
    val += (val << 10);
60
120M
    val ^= (val >> 6);
61
120M
  }
62
6.35M
  val += (val << 3);
63
6.35M
  val ^= (val >> 11);
64
6.35M
  val += (val << 15);
65
6.35M
  return val;
66
6.35M
}
67
68
fz_hash_table *
69
fz_new_hash_table(fz_context *ctx, int initialsize, int keylen, int lock, fz_hash_table_drop_fn *drop_val)
70
45.1k
{
71
45.1k
  fz_hash_table *table;
72
73
45.1k
  if (keylen > FZ_HASH_TABLE_KEY_LENGTH)
74
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "hash table key length too large");
75
76
45.1k
  table = fz_malloc_struct(ctx, fz_hash_table);
77
45.1k
  table->keylen = keylen;
78
45.1k
  table->size = initialsize;
79
45.1k
  table->load = 0;
80
45.1k
  table->lock = lock;
81
45.1k
  table->drop_val = drop_val;
82
90.3k
  fz_try(ctx)
83
90.3k
  {
84
45.1k
    table->ents = Memento_label(fz_malloc_array(ctx, table->size, fz_hash_entry), "hash_entries");
85
45.1k
    memset(table->ents, 0, sizeof(fz_hash_entry) * table->size);
86
45.1k
  }
87
90.3k
  fz_catch(ctx)
88
0
  {
89
0
    fz_free(ctx, table);
90
0
    fz_rethrow(ctx);
91
0
  }
92
93
45.1k
  return table;
94
45.1k
}
95
96
void
97
fz_drop_hash_table(fz_context *ctx, fz_hash_table *table)
98
55.0k
{
99
55.0k
  if (!table)
100
9.81k
    return;
101
102
45.1k
  if (table->drop_val)
103
31.7k
  {
104
31.7k
    int i, n = table->size;
105
11.5M
    for (i = 0; i < n; ++i)
106
11.5M
    {
107
11.5M
      void *v = table->ents[i].val;
108
11.5M
      if (v)
109
668k
        table->drop_val(ctx, v);
110
11.5M
    }
111
31.7k
  }
112
113
45.1k
  fz_free(ctx, table->ents);
114
45.1k
  fz_free(ctx, table);
115
45.1k
}
116
117
static void *
118
do_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
119
1.75M
{
120
1.75M
  fz_hash_entry *ents;
121
1.75M
  unsigned size;
122
1.75M
  unsigned pos;
123
124
1.75M
  ents = table->ents;
125
1.75M
  size = table->size;
126
1.75M
  pos = hash(key, table->keylen) % size;
127
128
1.75M
  if (table->lock >= 0)
129
85.0k
    fz_assert_lock_held(ctx, table->lock);
130
131
4.18M
  while (1)
132
4.18M
  {
133
4.18M
    if (!ents[pos].val)
134
1.75M
    {
135
1.75M
      memcpy(ents[pos].key, key, table->keylen);
136
1.75M
      ents[pos].val = val;
137
1.75M
      table->load ++;
138
1.75M
      return NULL;
139
1.75M
    }
140
141
2.42M
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
142
0
    {
143
      /* This is legal, but should rarely happen. */
144
0
      return ents[pos].val;
145
0
    }
146
147
2.42M
    pos = (pos + 1) % size;
148
2.42M
  }
149
1.75M
}
150
151
/* Entered with the lock taken, held throughout and at exit, UNLESS the lock
152
 * is the alloc lock in which case it may be momentarily dropped. */
153
static void
154
fz_resize_hash(fz_context *ctx, fz_hash_table *table, int newsize)
155
469
{
156
469
  fz_hash_entry *oldents = table->ents;
157
469
  fz_hash_entry *newents;
158
469
  int oldsize = table->size;
159
469
  int oldload = table->load;
160
469
  int i;
161
162
469
  if (newsize < oldload * 8 / 10)
163
0
  {
164
0
    fz_warn(ctx, "assert: resize hash too small");
165
0
    return;
166
0
  }
167
168
469
  if (table->lock == FZ_LOCK_ALLOC)
169
0
    fz_unlock(ctx, table->lock);
170
469
  newents = Memento_label(fz_malloc_no_throw(ctx, newsize * sizeof (fz_hash_entry)), "hash_entries");
171
469
  if (table->lock == FZ_LOCK_ALLOC)
172
0
    fz_lock(ctx, table->lock);
173
469
  if (table->lock >= 0)
174
0
  {
175
0
    if (table->size >= newsize)
176
0
    {
177
      /* Someone else fixed it before we could lock! */
178
0
      if (table->lock == FZ_LOCK_ALLOC)
179
0
        fz_unlock(ctx, table->lock);
180
0
      fz_free(ctx, newents);
181
0
      if (table->lock == FZ_LOCK_ALLOC)
182
0
        fz_lock(ctx, table->lock);
183
0
      return;
184
0
    }
185
0
  }
186
469
  if (newents == NULL)
187
0
    fz_throw(ctx, FZ_ERROR_SYSTEM, "hash table resize failed; out of memory (%d entries)", newsize);
188
469
  table->ents = newents;
189
469
  memset(table->ents, 0, sizeof(fz_hash_entry) * newsize);
190
469
  table->size = newsize;
191
469
  table->load = 0;
192
193
1.25M
  for (i = 0; i < oldsize; i++)
194
1.25M
  {
195
1.25M
    if (oldents[i].val)
196
1.00M
    {
197
1.00M
      do_hash_insert(ctx, table, oldents[i].key, oldents[i].val);
198
1.00M
    }
199
1.25M
  }
200
201
469
  if (table->lock == FZ_LOCK_ALLOC)
202
0
    fz_unlock(ctx, table->lock);
203
469
  fz_free(ctx, oldents);
204
469
  if (table->lock == FZ_LOCK_ALLOC)
205
0
    fz_lock(ctx, table->lock);
206
469
}
207
208
void *
209
fz_hash_find(fz_context *ctx, fz_hash_table *table, const void *key)
210
4.51M
{
211
4.51M
  fz_hash_entry *ents = table->ents;
212
4.51M
  unsigned size = table->size;
213
4.51M
  unsigned pos = hash(key, table->keylen) % size;
214
215
4.51M
  if (table->lock >= 0)
216
756k
    fz_assert_lock_held(ctx, table->lock);
217
218
13.1M
  while (1)
219
13.1M
  {
220
13.1M
    if (!ents[pos].val)
221
825k
      return NULL;
222
223
12.3M
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
224
3.68M
      return ents[pos].val;
225
226
8.65M
    pos = (pos + 1) % size;
227
8.65M
  }
228
4.51M
}
229
230
void *
231
fz_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
232
753k
{
233
753k
  if (table->load > table->size * 8 / 10)
234
469
    fz_resize_hash(ctx, table, table->size * 2);
235
753k
  return do_hash_insert(ctx, table, key, val);
236
753k
}
237
238
static void
239
do_removal(fz_context *ctx, fz_hash_table *table, unsigned hole)
240
85.0k
{
241
85.0k
  fz_hash_entry *ents = table->ents;
242
85.0k
  unsigned size = table->size;
243
85.0k
  unsigned look, code;
244
245
85.0k
  if (table->lock >= 0)
246
85.0k
    fz_assert_lock_held(ctx, table->lock);
247
248
85.0k
  ents[hole].val = NULL;
249
250
85.0k
  look = hole + 1;
251
85.0k
  if (look == size)
252
16
    look = 0;
253
254
91.4k
  while (ents[look].val)
255
6.30k
  {
256
6.30k
    code = hash(ents[look].key, table->keylen) % size;
257
6.30k
    if ((code <= hole && hole < look) ||
258
6.30k
      (look < code && code <= hole) ||
259
6.30k
      (hole < look && look < code))
260
2.84k
    {
261
2.84k
      ents[hole] = ents[look];
262
2.84k
      ents[look].val = NULL;
263
2.84k
      hole = look;
264
2.84k
    }
265
266
6.30k
    look++;
267
6.30k
    if (look == size)
268
3
      look = 0;
269
6.30k
  }
270
271
85.0k
  table->load --;
272
85.0k
}
273
274
void
275
fz_hash_remove(fz_context *ctx, fz_hash_table *table, const void *key)
276
85.0k
{
277
85.0k
  fz_hash_entry *ents = table->ents;
278
85.0k
  unsigned size = table->size;
279
85.0k
  unsigned pos = hash(key, table->keylen) % size;
280
281
85.0k
  if (table->lock >= 0)
282
85.0k
    fz_assert_lock_held(ctx, table->lock);
283
284
85.3k
  while (1)
285
85.3k
  {
286
85.3k
    if (!ents[pos].val)
287
0
    {
288
0
      fz_warn(ctx, "assert: remove non-existent hash entry");
289
0
      return;
290
0
    }
291
292
85.3k
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
293
85.0k
    {
294
85.0k
      do_removal(ctx, table, pos);
295
85.0k
      return;
296
85.0k
    }
297
298
294
    pos++;
299
294
    if (pos == size)
300
0
      pos = 0;
301
294
  }
302
85.0k
}
303
304
void
305
fz_hash_for_each(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_for_each_fn *callback)
306
0
{
307
0
  int i;
308
0
  for (i = 0; i < table->size; ++i)
309
0
    if (table->ents[i].val)
310
0
      callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val);
311
0
}
312
313
void
314
fz_hash_filter(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_filter_fn *callback)
315
6
{
316
6
  int i;
317
12
restart:
318
45.5k
  for (i = 0; i < table->size; ++i)
319
45.5k
  {
320
45.5k
    if (table->ents[i].val)
321
6
    {
322
6
      if (callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val))
323
6
      {
324
6
        do_removal(ctx, table, i);
325
6
        goto restart; /* we may have moved some slots around, so just restart the scan */
326
6
      }
327
6
    }
328
45.5k
  }
329
12
}