/src/mupdf/source/fitz/hash.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | #include <assert.h> |
27 | | |
28 | | /* |
29 | | Simple hashtable with open addressing linear probe. |
30 | | Unlike text book examples, removing entries works |
31 | | correctly in this implementation, so it won't start |
32 | | exhibiting bad behaviour if entries are inserted |
33 | | and removed frequently. |
34 | | */ |
35 | | |
36 | | typedef struct |
37 | | { |
38 | | unsigned char key[FZ_HASH_TABLE_KEY_LENGTH]; |
39 | | void *val; |
40 | | } fz_hash_entry; |
41 | | |
42 | | struct fz_hash_table |
43 | | { |
44 | | int keylen; |
45 | | int size; |
46 | | int load; |
47 | | int lock; /* -1 or the lock used to protect this hash table */ |
48 | | fz_hash_table_drop_fn *drop_val; |
49 | | fz_hash_entry *ents; |
50 | | }; |
51 | | |
52 | | static unsigned hash(const unsigned char *s, int len) |
53 | 5.25M | { |
54 | 5.25M | unsigned val = 0; |
55 | 5.25M | int i; |
56 | 103M | for (i = 0; i < len; i++) |
57 | 98.4M | { |
58 | 98.4M | val += s[i]; |
59 | 98.4M | val += (val << 10); |
60 | 98.4M | val ^= (val >> 6); |
61 | 98.4M | } |
62 | 5.25M | val += (val << 3); |
63 | 5.25M | val ^= (val >> 11); |
64 | 5.25M | val += (val << 15); |
65 | 5.25M | return val; |
66 | 5.25M | } |
67 | | |
68 | | fz_hash_table * |
69 | | fz_new_hash_table(fz_context *ctx, int initialsize, int keylen, int lock, fz_hash_table_drop_fn *drop_val) |
70 | 79.2k | { |
71 | 79.2k | fz_hash_table *table; |
72 | | |
73 | 79.2k | if (keylen > FZ_HASH_TABLE_KEY_LENGTH) |
74 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "hash table key length too large"); |
75 | | |
76 | 79.2k | table = fz_malloc_struct(ctx, fz_hash_table); |
77 | 79.2k | table->keylen = keylen; |
78 | 79.2k | table->size = initialsize; |
79 | 79.2k | table->load = 0; |
80 | 79.2k | table->lock = lock; |
81 | 79.2k | table->drop_val = drop_val; |
82 | 158k | fz_try(ctx) |
83 | 158k | { |
84 | 79.2k | table->ents = Memento_label(fz_malloc_array(ctx, table->size, fz_hash_entry), "hash_entries"); |
85 | 79.2k | memset(table->ents, 0, sizeof(fz_hash_entry) * table->size); |
86 | 79.2k | } |
87 | 158k | fz_catch(ctx) |
88 | 0 | { |
89 | 0 | fz_free(ctx, table); |
90 | 0 | fz_rethrow(ctx); |
91 | 0 | } |
92 | | |
93 | 79.2k | return table; |
94 | 79.2k | } |
95 | | |
96 | | void |
97 | | fz_drop_hash_table(fz_context *ctx, fz_hash_table *table) |
98 | 90.8k | { |
99 | 90.8k | if (!table) |
100 | 11.5k | return; |
101 | | |
102 | 79.2k | if (table->drop_val) |
103 | 63.6k | { |
104 | 63.6k | int i, n = table->size; |
105 | 18.3M | for (i = 0; i < n; ++i) |
106 | 18.2M | { |
107 | 18.2M | void *v = table->ents[i].val; |
108 | 18.2M | if (v) |
109 | 507k | table->drop_val(ctx, v); |
110 | 18.2M | } |
111 | 63.6k | } |
112 | | |
113 | 79.2k | fz_free(ctx, table->ents); |
114 | 79.2k | fz_free(ctx, table); |
115 | 79.2k | } |
116 | | |
117 | | static void * |
118 | | do_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val) |
119 | 1.24M | { |
120 | 1.24M | fz_hash_entry *ents; |
121 | 1.24M | unsigned size; |
122 | 1.24M | unsigned pos; |
123 | | |
124 | 1.24M | ents = table->ents; |
125 | 1.24M | size = table->size; |
126 | 1.24M | pos = hash(key, table->keylen) % size; |
127 | | |
128 | 1.24M | if (table->lock >= 0) |
129 | 59.9k | fz_assert_lock_held(ctx, table->lock); |
130 | | |
131 | 3.00M | while (1) |
132 | 3.00M | { |
133 | 3.00M | if (!ents[pos].val) |
134 | 1.24M | { |
135 | 1.24M | memcpy(ents[pos].key, key, table->keylen); |
136 | 1.24M | ents[pos].val = val; |
137 | 1.24M | table->load ++; |
138 | 1.24M | return NULL; |
139 | 1.24M | } |
140 | | |
141 | 1.75M | if (memcmp(key, ents[pos].key, table->keylen) == 0) |
142 | 0 | { |
143 | | /* This is legal, but should rarely happen. */ |
144 | 0 | return ents[pos].val; |
145 | 0 | } |
146 | | |
147 | 1.75M | pos = (pos + 1) % size; |
148 | 1.75M | } |
149 | 1.24M | } |
150 | | |
151 | | /* Entered with the lock taken, held throughout and at exit, UNLESS the lock |
152 | | * is the alloc lock in which case it may be momentarily dropped. */ |
153 | | static void |
154 | | fz_resize_hash(fz_context *ctx, fz_hash_table *table, int newsize) |
155 | 485 | { |
156 | 485 | fz_hash_entry *oldents = table->ents; |
157 | 485 | fz_hash_entry *newents; |
158 | 485 | int oldsize = table->size; |
159 | 485 | int oldload = table->load; |
160 | 485 | int i; |
161 | | |
162 | 485 | if (newsize < oldload * 8 / 10) |
163 | 0 | { |
164 | 0 | fz_warn(ctx, "assert: resize hash too small"); |
165 | 0 | return; |
166 | 0 | } |
167 | | |
168 | 485 | if (table->lock == FZ_LOCK_ALLOC) |
169 | 0 | fz_unlock(ctx, table->lock); |
170 | 485 | newents = fz_malloc_no_throw(ctx, newsize * sizeof (fz_hash_entry)); |
171 | 485 | if (table->lock == FZ_LOCK_ALLOC) |
172 | 0 | fz_lock(ctx, table->lock); |
173 | 485 | if (table->lock >= 0) |
174 | 0 | { |
175 | 0 | if (table->size >= newsize) |
176 | 0 | { |
177 | | /* Someone else fixed it before we could lock! */ |
178 | 0 | if (table->lock == FZ_LOCK_ALLOC) |
179 | 0 | fz_unlock(ctx, table->lock); |
180 | 0 | fz_free(ctx, newents); |
181 | 0 | if (table->lock == FZ_LOCK_ALLOC) |
182 | 0 | fz_lock(ctx, table->lock); |
183 | 0 | return; |
184 | 0 | } |
185 | 0 | } |
186 | 485 | if (newents == NULL) |
187 | 0 | fz_throw(ctx, FZ_ERROR_SYSTEM, "hash table resize failed; out of memory (%d entries)", newsize); |
188 | 485 | table->ents = newents; |
189 | 485 | memset(table->ents, 0, sizeof(fz_hash_entry) * newsize); |
190 | 485 | table->size = newsize; |
191 | 485 | table->load = 0; |
192 | | |
193 | 850k | for (i = 0; i < oldsize; i++) |
194 | 849k | { |
195 | 849k | if (oldents[i].val) |
196 | 680k | { |
197 | 680k | do_hash_insert(ctx, table, oldents[i].key, oldents[i].val); |
198 | 680k | } |
199 | 849k | } |
200 | | |
201 | 485 | if (table->lock == FZ_LOCK_ALLOC) |
202 | 0 | fz_unlock(ctx, table->lock); |
203 | 485 | fz_free(ctx, oldents); |
204 | 485 | if (table->lock == FZ_LOCK_ALLOC) |
205 | 0 | fz_lock(ctx, table->lock); |
206 | 485 | } |
207 | | |
208 | | void * |
209 | | fz_hash_find(fz_context *ctx, fz_hash_table *table, const void *key) |
210 | 3.94M | { |
211 | 3.94M | fz_hash_entry *ents = table->ents; |
212 | 3.94M | unsigned size = table->size; |
213 | 3.94M | unsigned pos = hash(key, table->keylen) % size; |
214 | | |
215 | 3.94M | if (table->lock >= 0) |
216 | 690k | fz_assert_lock_held(ctx, table->lock); |
217 | | |
218 | 10.5M | while (1) |
219 | 10.5M | { |
220 | 10.5M | if (!ents[pos].val) |
221 | 626k | return NULL; |
222 | | |
223 | 9.91M | if (memcmp(key, ents[pos].key, table->keylen) == 0) |
224 | 3.32M | return ents[pos].val; |
225 | | |
226 | 6.59M | pos = (pos + 1) % size; |
227 | 6.59M | } |
228 | 3.94M | } |
229 | | |
230 | | void * |
231 | | fz_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val) |
232 | 567k | { |
233 | 567k | if (table->load > table->size * 8 / 10) |
234 | 485 | fz_resize_hash(ctx, table, table->size * 2); |
235 | 567k | return do_hash_insert(ctx, table, key, val); |
236 | 567k | } |
237 | | |
238 | | static void |
239 | | do_removal(fz_context *ctx, fz_hash_table *table, unsigned hole) |
240 | 59.9k | { |
241 | 59.9k | fz_hash_entry *ents = table->ents; |
242 | 59.9k | unsigned size = table->size; |
243 | 59.9k | unsigned look, code; |
244 | | |
245 | 59.9k | if (table->lock >= 0) |
246 | 59.9k | fz_assert_lock_held(ctx, table->lock); |
247 | | |
248 | 59.9k | ents[hole].val = NULL; |
249 | | |
250 | 59.9k | look = hole + 1; |
251 | 59.9k | if (look == size) |
252 | 4 | look = 0; |
253 | | |
254 | 63.6k | while (ents[look].val) |
255 | 3.72k | { |
256 | 3.72k | code = hash(ents[look].key, table->keylen) % size; |
257 | 3.72k | if ((code <= hole && hole < look) || |
258 | 3.72k | (look < code && code <= hole) || |
259 | 3.72k | (hole < look && look < code)) |
260 | 1.63k | { |
261 | 1.63k | ents[hole] = ents[look]; |
262 | 1.63k | ents[look].val = NULL; |
263 | 1.63k | hole = look; |
264 | 1.63k | } |
265 | | |
266 | 3.72k | look++; |
267 | 3.72k | if (look == size) |
268 | 0 | look = 0; |
269 | 3.72k | } |
270 | | |
271 | 59.9k | table->load --; |
272 | 59.9k | } |
273 | | |
274 | | void |
275 | | fz_hash_remove(fz_context *ctx, fz_hash_table *table, const void *key) |
276 | 59.9k | { |
277 | 59.9k | fz_hash_entry *ents = table->ents; |
278 | 59.9k | unsigned size = table->size; |
279 | 59.9k | unsigned pos = hash(key, table->keylen) % size; |
280 | | |
281 | 59.9k | if (table->lock >= 0) |
282 | 59.9k | fz_assert_lock_held(ctx, table->lock); |
283 | | |
284 | 60.0k | while (1) |
285 | 60.0k | { |
286 | 60.0k | if (!ents[pos].val) |
287 | 0 | { |
288 | 0 | fz_warn(ctx, "assert: remove non-existent hash entry"); |
289 | 0 | return; |
290 | 0 | } |
291 | | |
292 | 60.0k | if (memcmp(key, ents[pos].key, table->keylen) == 0) |
293 | 59.9k | { |
294 | 59.9k | do_removal(ctx, table, pos); |
295 | 59.9k | return; |
296 | 59.9k | } |
297 | | |
298 | 143 | pos++; |
299 | 143 | if (pos == size) |
300 | 0 | pos = 0; |
301 | 143 | } |
302 | 59.9k | } |
303 | | |
304 | | void |
305 | | fz_hash_for_each(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_for_each_fn *callback) |
306 | 0 | { |
307 | 0 | int i; |
308 | 0 | for (i = 0; i < table->size; ++i) |
309 | 0 | if (table->ents[i].val) |
310 | 0 | callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val); |
311 | 0 | } |
312 | | |
313 | | void |
314 | | fz_hash_filter(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_filter_fn *callback) |
315 | 6 | { |
316 | 6 | int i; |
317 | 12 | restart: |
318 | 43.9k | for (i = 0; i < table->size; ++i) |
319 | 43.8k | { |
320 | 43.8k | if (table->ents[i].val) |
321 | 6 | { |
322 | 6 | if (callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val)) |
323 | 6 | { |
324 | 6 | do_removal(ctx, table, i); |
325 | 6 | goto restart; /* we may have moved some slots around, so just restart the scan */ |
326 | 6 | } |
327 | 6 | } |
328 | 43.8k | } |
329 | 12 | } |