Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/fitz/hash.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
#include <assert.h>
27
28
/*
29
  Simple hashtable with open addressing linear probe.
30
  Unlike text book examples, removing entries works
31
  correctly in this implementation, so it won't start
32
  exhibiting bad behaviour if entries are inserted
33
  and removed frequently.
34
*/
35
36
typedef struct
37
{
38
  unsigned char key[FZ_HASH_TABLE_KEY_LENGTH];
39
  void *val;
40
} fz_hash_entry;
41
42
struct fz_hash_table
43
{
44
  int keylen;
45
  int size;
46
  int load;
47
  int lock; /* -1 or the lock used to protect this hash table */
48
  fz_hash_table_drop_fn *drop_val;
49
  fz_hash_entry *ents;
50
};
51
52
static unsigned hash(const unsigned char *s, int len)
53
5.25M
{
54
5.25M
  unsigned val = 0;
55
5.25M
  int i;
56
103M
  for (i = 0; i < len; i++)
57
98.4M
  {
58
98.4M
    val += s[i];
59
98.4M
    val += (val << 10);
60
98.4M
    val ^= (val >> 6);
61
98.4M
  }
62
5.25M
  val += (val << 3);
63
5.25M
  val ^= (val >> 11);
64
5.25M
  val += (val << 15);
65
5.25M
  return val;
66
5.25M
}
67
68
fz_hash_table *
69
fz_new_hash_table(fz_context *ctx, int initialsize, int keylen, int lock, fz_hash_table_drop_fn *drop_val)
70
79.2k
{
71
79.2k
  fz_hash_table *table;
72
73
79.2k
  if (keylen > FZ_HASH_TABLE_KEY_LENGTH)
74
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "hash table key length too large");
75
76
79.2k
  table = fz_malloc_struct(ctx, fz_hash_table);
77
79.2k
  table->keylen = keylen;
78
79.2k
  table->size = initialsize;
79
79.2k
  table->load = 0;
80
79.2k
  table->lock = lock;
81
79.2k
  table->drop_val = drop_val;
82
158k
  fz_try(ctx)
83
158k
  {
84
79.2k
    table->ents = Memento_label(fz_malloc_array(ctx, table->size, fz_hash_entry), "hash_entries");
85
79.2k
    memset(table->ents, 0, sizeof(fz_hash_entry) * table->size);
86
79.2k
  }
87
158k
  fz_catch(ctx)
88
0
  {
89
0
    fz_free(ctx, table);
90
0
    fz_rethrow(ctx);
91
0
  }
92
93
79.2k
  return table;
94
79.2k
}
95
96
void
97
fz_drop_hash_table(fz_context *ctx, fz_hash_table *table)
98
90.8k
{
99
90.8k
  if (!table)
100
11.5k
    return;
101
102
79.2k
  if (table->drop_val)
103
63.6k
  {
104
63.6k
    int i, n = table->size;
105
18.3M
    for (i = 0; i < n; ++i)
106
18.2M
    {
107
18.2M
      void *v = table->ents[i].val;
108
18.2M
      if (v)
109
507k
        table->drop_val(ctx, v);
110
18.2M
    }
111
63.6k
  }
112
113
79.2k
  fz_free(ctx, table->ents);
114
79.2k
  fz_free(ctx, table);
115
79.2k
}
116
117
static void *
118
do_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
119
1.24M
{
120
1.24M
  fz_hash_entry *ents;
121
1.24M
  unsigned size;
122
1.24M
  unsigned pos;
123
124
1.24M
  ents = table->ents;
125
1.24M
  size = table->size;
126
1.24M
  pos = hash(key, table->keylen) % size;
127
128
1.24M
  if (table->lock >= 0)
129
59.9k
    fz_assert_lock_held(ctx, table->lock);
130
131
3.00M
  while (1)
132
3.00M
  {
133
3.00M
    if (!ents[pos].val)
134
1.24M
    {
135
1.24M
      memcpy(ents[pos].key, key, table->keylen);
136
1.24M
      ents[pos].val = val;
137
1.24M
      table->load ++;
138
1.24M
      return NULL;
139
1.24M
    }
140
141
1.75M
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
142
0
    {
143
      /* This is legal, but should rarely happen. */
144
0
      return ents[pos].val;
145
0
    }
146
147
1.75M
    pos = (pos + 1) % size;
148
1.75M
  }
149
1.24M
}
150
151
/* Entered with the lock taken, held throughout and at exit, UNLESS the lock
152
 * is the alloc lock in which case it may be momentarily dropped. */
153
static void
154
fz_resize_hash(fz_context *ctx, fz_hash_table *table, int newsize)
155
485
{
156
485
  fz_hash_entry *oldents = table->ents;
157
485
  fz_hash_entry *newents;
158
485
  int oldsize = table->size;
159
485
  int oldload = table->load;
160
485
  int i;
161
162
485
  if (newsize < oldload * 8 / 10)
163
0
  {
164
0
    fz_warn(ctx, "assert: resize hash too small");
165
0
    return;
166
0
  }
167
168
485
  if (table->lock == FZ_LOCK_ALLOC)
169
0
    fz_unlock(ctx, table->lock);
170
485
  newents = fz_malloc_no_throw(ctx, newsize * sizeof (fz_hash_entry));
171
485
  if (table->lock == FZ_LOCK_ALLOC)
172
0
    fz_lock(ctx, table->lock);
173
485
  if (table->lock >= 0)
174
0
  {
175
0
    if (table->size >= newsize)
176
0
    {
177
      /* Someone else fixed it before we could lock! */
178
0
      if (table->lock == FZ_LOCK_ALLOC)
179
0
        fz_unlock(ctx, table->lock);
180
0
      fz_free(ctx, newents);
181
0
      if (table->lock == FZ_LOCK_ALLOC)
182
0
        fz_lock(ctx, table->lock);
183
0
      return;
184
0
    }
185
0
  }
186
485
  if (newents == NULL)
187
0
    fz_throw(ctx, FZ_ERROR_SYSTEM, "hash table resize failed; out of memory (%d entries)", newsize);
188
485
  table->ents = newents;
189
485
  memset(table->ents, 0, sizeof(fz_hash_entry) * newsize);
190
485
  table->size = newsize;
191
485
  table->load = 0;
192
193
850k
  for (i = 0; i < oldsize; i++)
194
849k
  {
195
849k
    if (oldents[i].val)
196
680k
    {
197
680k
      do_hash_insert(ctx, table, oldents[i].key, oldents[i].val);
198
680k
    }
199
849k
  }
200
201
485
  if (table->lock == FZ_LOCK_ALLOC)
202
0
    fz_unlock(ctx, table->lock);
203
485
  fz_free(ctx, oldents);
204
485
  if (table->lock == FZ_LOCK_ALLOC)
205
0
    fz_lock(ctx, table->lock);
206
485
}
207
208
void *
209
fz_hash_find(fz_context *ctx, fz_hash_table *table, const void *key)
210
3.94M
{
211
3.94M
  fz_hash_entry *ents = table->ents;
212
3.94M
  unsigned size = table->size;
213
3.94M
  unsigned pos = hash(key, table->keylen) % size;
214
215
3.94M
  if (table->lock >= 0)
216
690k
    fz_assert_lock_held(ctx, table->lock);
217
218
10.5M
  while (1)
219
10.5M
  {
220
10.5M
    if (!ents[pos].val)
221
626k
      return NULL;
222
223
9.91M
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
224
3.32M
      return ents[pos].val;
225
226
6.59M
    pos = (pos + 1) % size;
227
6.59M
  }
228
3.94M
}
229
230
void *
231
fz_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
232
567k
{
233
567k
  if (table->load > table->size * 8 / 10)
234
485
    fz_resize_hash(ctx, table, table->size * 2);
235
567k
  return do_hash_insert(ctx, table, key, val);
236
567k
}
237
238
static void
239
do_removal(fz_context *ctx, fz_hash_table *table, unsigned hole)
240
59.9k
{
241
59.9k
  fz_hash_entry *ents = table->ents;
242
59.9k
  unsigned size = table->size;
243
59.9k
  unsigned look, code;
244
245
59.9k
  if (table->lock >= 0)
246
59.9k
    fz_assert_lock_held(ctx, table->lock);
247
248
59.9k
  ents[hole].val = NULL;
249
250
59.9k
  look = hole + 1;
251
59.9k
  if (look == size)
252
4
    look = 0;
253
254
63.6k
  while (ents[look].val)
255
3.72k
  {
256
3.72k
    code = hash(ents[look].key, table->keylen) % size;
257
3.72k
    if ((code <= hole && hole < look) ||
258
3.72k
      (look < code && code <= hole) ||
259
3.72k
      (hole < look && look < code))
260
1.63k
    {
261
1.63k
      ents[hole] = ents[look];
262
1.63k
      ents[look].val = NULL;
263
1.63k
      hole = look;
264
1.63k
    }
265
266
3.72k
    look++;
267
3.72k
    if (look == size)
268
0
      look = 0;
269
3.72k
  }
270
271
59.9k
  table->load --;
272
59.9k
}
273
274
void
275
fz_hash_remove(fz_context *ctx, fz_hash_table *table, const void *key)
276
59.9k
{
277
59.9k
  fz_hash_entry *ents = table->ents;
278
59.9k
  unsigned size = table->size;
279
59.9k
  unsigned pos = hash(key, table->keylen) % size;
280
281
59.9k
  if (table->lock >= 0)
282
59.9k
    fz_assert_lock_held(ctx, table->lock);
283
284
60.0k
  while (1)
285
60.0k
  {
286
60.0k
    if (!ents[pos].val)
287
0
    {
288
0
      fz_warn(ctx, "assert: remove non-existent hash entry");
289
0
      return;
290
0
    }
291
292
60.0k
    if (memcmp(key, ents[pos].key, table->keylen) == 0)
293
59.9k
    {
294
59.9k
      do_removal(ctx, table, pos);
295
59.9k
      return;
296
59.9k
    }
297
298
143
    pos++;
299
143
    if (pos == size)
300
0
      pos = 0;
301
143
  }
302
59.9k
}
303
304
void
305
fz_hash_for_each(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_for_each_fn *callback)
306
0
{
307
0
  int i;
308
0
  for (i = 0; i < table->size; ++i)
309
0
    if (table->ents[i].val)
310
0
      callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val);
311
0
}
312
313
void
314
fz_hash_filter(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_filter_fn *callback)
315
6
{
316
6
  int i;
317
12
restart:
318
43.9k
  for (i = 0; i < table->size; ++i)
319
43.8k
  {
320
43.8k
    if (table->ents[i].val)
321
6
    {
322
6
      if (callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val))
323
6
      {
324
6
        do_removal(ctx, table, i);
325
6
        goto restart; /* we may have moved some slots around, so just restart the scan */
326
6
      }
327
6
    }
328
43.8k
  }
329
12
}