/src/libzip/lib/zip_hash.c
Line | Count | Source |
1 | | /* |
2 | | zip_hash.c -- hash table string -> uint64 |
3 | | Copyright (C) 2015-2024 Dieter Baron and Thomas Klausner |
4 | | |
5 | | This file is part of libzip, a library to manipulate ZIP archives. |
6 | | The authors can be contacted at <info@libzip.org> |
7 | | |
8 | | Redistribution and use in source and binary forms, with or without |
9 | | modification, are permitted provided that the following conditions |
10 | | are met: |
11 | | 1. Redistributions of source code must retain the above copyright |
12 | | notice, this list of conditions and the following disclaimer. |
13 | | 2. Redistributions in binary form must reproduce the above copyright |
14 | | notice, this list of conditions and the following disclaimer in |
15 | | the documentation and/or other materials provided with the |
16 | | distribution. |
17 | | 3. The names of the authors may not be used to endorse or promote |
18 | | products derived from this software without specific prior |
19 | | written permission. |
20 | | |
21 | | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS |
22 | | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
23 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
24 | | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
25 | | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
26 | | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
27 | | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
29 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
30 | | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN |
31 | | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #include "zipint.h" |
35 | | #include <stdlib.h> |
36 | | #include <string.h> |
37 | | |
38 | | /* parameter for the string hash function */ |
39 | 175k | #define HASH_MULTIPLIER 33 |
40 | 24.1k | #define HASH_START 5381 |
41 | | |
42 | | /* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */ |
43 | 14.2k | #define HASH_MAX_FILL .75 |
44 | 0 | #define HASH_MIN_FILL .01 |
45 | | |
46 | | /* but hash table size is kept between these */ |
47 | 0 | #define HASH_MIN_SIZE 256 |
48 | 3.09k | #define HASH_MAX_SIZE 0x80000000ul |
49 | | |
50 | | struct zip_hash_entry { |
51 | | const zip_uint8_t *name; |
52 | | zip_int64_t orig_index; |
53 | | zip_int64_t current_index; |
54 | | struct zip_hash_entry *next; |
55 | | zip_uint32_t hash_value; |
56 | | }; |
57 | | typedef struct zip_hash_entry zip_hash_entry_t; |
58 | | |
59 | | struct zip_hash { |
60 | | zip_uint32_t table_size; |
61 | | zip_uint64_t nentries; |
62 | | zip_hash_entry_t **table; |
63 | | }; |
64 | | |
65 | | |
66 | | /* free list of entries */ |
67 | 5.27k | static void free_list(zip_hash_entry_t *entry) { |
68 | 11.4k | while (entry != NULL) { |
69 | 6.19k | zip_hash_entry_t *next = entry->next; |
70 | 6.19k | free(entry); |
71 | 6.19k | entry = next; |
72 | 6.19k | } |
73 | 5.27k | } |
74 | | |
75 | | |
76 | | /* compute hash of string, full 32 bit value */ |
77 | 24.1k | static zip_uint32_t hash_string(const zip_uint8_t *name) { |
78 | 24.1k | zip_uint64_t value = HASH_START; |
79 | | |
80 | 24.1k | if (name == NULL) { |
81 | 0 | return 0; |
82 | 0 | } |
83 | | |
84 | 200k | while (*name != 0) { |
85 | 175k | value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul); |
86 | 175k | name++; |
87 | 175k | } |
88 | | |
89 | 24.1k | return (zip_uint32_t)value; |
90 | 24.1k | } |
91 | | |
92 | | |
93 | | /* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */ |
94 | 3.09k | static bool hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) { |
95 | 3.09k | zip_hash_entry_t **new_table; |
96 | | |
97 | 3.09k | if (new_size == hash->table_size) { |
98 | 0 | return true; |
99 | 0 | } |
100 | | |
101 | 3.09k | if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) { |
102 | 0 | zip_error_set(error, ZIP_ER_MEMORY, 0); |
103 | 0 | return false; |
104 | 0 | } |
105 | | |
106 | 3.09k | if (hash->nentries > 0) { |
107 | 1.26k | zip_uint32_t i; |
108 | | |
109 | 2.58k | for (i = 0; i < hash->table_size; i++) { |
110 | 1.31k | zip_hash_entry_t *entry = hash->table[i]; |
111 | 2.62k | while (entry) { |
112 | 1.31k | zip_hash_entry_t *next = entry->next; |
113 | | |
114 | 1.31k | zip_uint32_t new_index = entry->hash_value % new_size; |
115 | | |
116 | 1.31k | entry->next = new_table[new_index]; |
117 | 1.31k | new_table[new_index] = entry; |
118 | | |
119 | 1.31k | entry = next; |
120 | 1.31k | } |
121 | 1.31k | } |
122 | 1.26k | } |
123 | | |
124 | 3.09k | free(hash->table); |
125 | 3.09k | hash->table = new_table; |
126 | 3.09k | hash->table_size = new_size; |
127 | | |
128 | 3.09k | return true; |
129 | 3.09k | } |
130 | | |
131 | | |
132 | 1.83k | static zip_uint32_t size_for_capacity(zip_uint64_t capacity) { |
133 | 1.83k | double needed_size = capacity / HASH_MAX_FILL; |
134 | 1.83k | zip_uint32_t v; |
135 | | |
136 | 1.83k | if (needed_size > ZIP_UINT32_MAX) { |
137 | 0 | v = ZIP_UINT32_MAX; |
138 | 0 | } |
139 | 1.83k | else { |
140 | 1.83k | v = (zip_uint32_t)needed_size; |
141 | 1.83k | } |
142 | | |
143 | 1.83k | if (v > HASH_MAX_SIZE) { |
144 | 0 | return HASH_MAX_SIZE; |
145 | 0 | } |
146 | | |
147 | | /* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu> |
148 | | (http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */ |
149 | | |
150 | 1.83k | v--; |
151 | 1.83k | v |= v >> 1; |
152 | 1.83k | v |= v >> 2; |
153 | 1.83k | v |= v >> 4; |
154 | 1.83k | v |= v >> 8; |
155 | 1.83k | v |= v >> 16; |
156 | 1.83k | v++; |
157 | | |
158 | 1.83k | return v; |
159 | 1.83k | } |
160 | | |
161 | | |
162 | 4.77k | zip_hash_t *_zip_hash_new(zip_error_t *error) { |
163 | 4.77k | zip_hash_t *hash; |
164 | | |
165 | 4.77k | if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) { |
166 | 0 | zip_error_set(error, ZIP_ER_MEMORY, 0); |
167 | 0 | return NULL; |
168 | 0 | } |
169 | | |
170 | 4.77k | hash->table_size = 0; |
171 | 4.77k | hash->nentries = 0; |
172 | 4.77k | hash->table = NULL; |
173 | | |
174 | 4.77k | return hash; |
175 | 4.77k | } |
176 | | |
177 | | |
178 | 4.77k | void _zip_hash_free(zip_hash_t *hash) { |
179 | 4.77k | zip_uint32_t i; |
180 | | |
181 | 4.77k | if (hash == NULL) { |
182 | 0 | return; |
183 | 0 | } |
184 | | |
185 | 4.77k | if (hash->table != NULL) { |
186 | 50.4k | for (i = 0; i < hash->table_size; i++) { |
187 | 48.6k | if (hash->table[i] != NULL) { |
188 | 5.27k | free_list(hash->table[i]); |
189 | 5.27k | } |
190 | 48.6k | } |
191 | 1.83k | free(hash->table); |
192 | 1.83k | } |
193 | 4.77k | free(hash); |
194 | 4.77k | } |
195 | | |
196 | | |
197 | | /* insert into hash, return error on existence or memory issues */ |
198 | 24.1k | bool _zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) { |
199 | 24.1k | zip_uint32_t hash_value, table_index; |
200 | 24.1k | zip_hash_entry_t *entry; |
201 | | |
202 | 24.1k | if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) { |
203 | 0 | zip_error_set(error, ZIP_ER_INVAL, 0); |
204 | 0 | return false; |
205 | 0 | } |
206 | | |
207 | 24.1k | if (hash->table_size == 0) { |
208 | 0 | if (!hash_resize(hash, HASH_MIN_SIZE, error)) { |
209 | 0 | return false; |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | 24.1k | hash_value = hash_string(name); |
214 | 24.1k | table_index = hash_value % hash->table_size; |
215 | | |
216 | 25.5k | for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) { |
217 | 19.3k | if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) { |
218 | 17.9k | if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) { |
219 | 17.9k | zip_error_set(error, ZIP_ER_EXISTS, 0); |
220 | 17.9k | return false; |
221 | 17.9k | } |
222 | 0 | else { |
223 | 0 | break; |
224 | 0 | } |
225 | 17.9k | } |
226 | 19.3k | } |
227 | | |
228 | 6.19k | if (entry == NULL) { |
229 | 6.19k | if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) { |
230 | 0 | zip_error_set(error, ZIP_ER_MEMORY, 0); |
231 | 0 | return false; |
232 | 0 | } |
233 | 6.19k | entry->name = name; |
234 | 6.19k | entry->next = hash->table[table_index]; |
235 | 6.19k | hash->table[table_index] = entry; |
236 | 6.19k | entry->hash_value = hash_value; |
237 | 6.19k | entry->orig_index = -1; |
238 | 6.19k | hash->nentries++; |
239 | 6.19k | if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) { |
240 | 1.26k | if (!hash_resize(hash, hash->table_size * 2, error)) { |
241 | 0 | return false; |
242 | 0 | } |
243 | 1.26k | } |
244 | 6.19k | } |
245 | | |
246 | 6.19k | if (flags & ZIP_FL_UNCHANGED) { |
247 | 6.19k | entry->orig_index = (zip_int64_t)index; |
248 | 6.19k | } |
249 | 6.19k | entry->current_index = (zip_int64_t)index; |
250 | | |
251 | 6.19k | return true; |
252 | 6.19k | } |
253 | | |
254 | | |
255 | | /* remove entry from hash, error if not found */ |
256 | 0 | bool _zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) { |
257 | 0 | zip_uint32_t hash_value, index; |
258 | 0 | zip_hash_entry_t *entry, *previous; |
259 | |
|
260 | 0 | if (hash == NULL || name == NULL) { |
261 | 0 | zip_error_set(error, ZIP_ER_INVAL, 0); |
262 | 0 | return false; |
263 | 0 | } |
264 | | |
265 | 0 | if (hash->nentries > 0) { |
266 | 0 | hash_value = hash_string(name); |
267 | 0 | index = hash_value % hash->table_size; |
268 | 0 | previous = NULL; |
269 | 0 | entry = hash->table[index]; |
270 | 0 | while (entry) { |
271 | 0 | if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) { |
272 | 0 | if (entry->orig_index == -1) { |
273 | 0 | if (previous) { |
274 | 0 | previous->next = entry->next; |
275 | 0 | } |
276 | 0 | else { |
277 | 0 | hash->table[index] = entry->next; |
278 | 0 | } |
279 | 0 | free(entry); |
280 | 0 | hash->nentries--; |
281 | 0 | if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) { |
282 | 0 | if (!hash_resize(hash, hash->table_size / 2, error)) { |
283 | 0 | return false; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | 0 | else { |
288 | 0 | entry->current_index = -1; |
289 | 0 | } |
290 | 0 | return true; |
291 | 0 | } |
292 | 0 | previous = entry; |
293 | 0 | entry = entry->next; |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 0 | zip_error_set(error, ZIP_ER_NOENT, 0); |
298 | 0 | return false; |
299 | 0 | } |
300 | | |
301 | | |
302 | | /* find value for entry in hash, -1 if not found */ |
303 | 0 | zip_int64_t _zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) { |
304 | 0 | zip_uint32_t hash_value, index; |
305 | 0 | zip_hash_entry_t *entry; |
306 | |
|
307 | 0 | if (hash == NULL || name == NULL) { |
308 | 0 | zip_error_set(error, ZIP_ER_INVAL, 0); |
309 | 0 | return -1; |
310 | 0 | } |
311 | | |
312 | 0 | if (hash->nentries > 0) { |
313 | 0 | hash_value = hash_string(name); |
314 | 0 | index = hash_value % hash->table_size; |
315 | 0 | for (entry = hash->table[index]; entry != NULL; entry = entry->next) { |
316 | 0 | if (strcmp((const char *)name, (const char *)entry->name) == 0) { |
317 | 0 | if (flags & ZIP_FL_UNCHANGED) { |
318 | 0 | if (entry->orig_index != -1) { |
319 | 0 | return entry->orig_index; |
320 | 0 | } |
321 | 0 | } |
322 | 0 | else { |
323 | 0 | if (entry->current_index != -1) { |
324 | 0 | return entry->current_index; |
325 | 0 | } |
326 | 0 | } |
327 | 0 | break; |
328 | 0 | } |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | 0 | zip_error_set(error, ZIP_ER_NOENT, 0); |
333 | 0 | return -1; |
334 | 0 | } |
335 | | |
336 | | |
337 | 2.09k | bool _zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) { |
338 | 2.09k | zip_uint32_t new_size; |
339 | | |
340 | 2.09k | if (capacity == 0) { |
341 | 265 | return true; |
342 | 265 | } |
343 | | |
344 | 1.83k | new_size = size_for_capacity(capacity); |
345 | | |
346 | 1.83k | if (new_size <= hash->table_size) { |
347 | 0 | return true; |
348 | 0 | } |
349 | | |
350 | 1.83k | if (!hash_resize(hash, new_size, error)) { |
351 | 0 | return false; |
352 | 0 | } |
353 | | |
354 | 1.83k | return true; |
355 | 1.83k | } |
356 | | |
357 | | |
358 | 0 | bool _zip_hash_revert(zip_hash_t *hash, zip_error_t *error) { |
359 | 0 | zip_uint32_t i; |
360 | 0 | zip_hash_entry_t *entry, *previous; |
361 | |
|
362 | 0 | for (i = 0; i < hash->table_size; i++) { |
363 | 0 | previous = NULL; |
364 | 0 | entry = hash->table[i]; |
365 | 0 | while (entry) { |
366 | 0 | if (entry->orig_index == -1) { |
367 | 0 | zip_hash_entry_t *p; |
368 | 0 | if (previous) { |
369 | 0 | previous->next = entry->next; |
370 | 0 | } |
371 | 0 | else { |
372 | 0 | hash->table[i] = entry->next; |
373 | 0 | } |
374 | 0 | p = entry; |
375 | 0 | entry = entry->next; |
376 | | /* previous does not change */ |
377 | 0 | free(p); |
378 | 0 | hash->nentries--; |
379 | 0 | } |
380 | 0 | else { |
381 | 0 | entry->current_index = entry->orig_index; |
382 | 0 | previous = entry; |
383 | 0 | entry = entry->next; |
384 | 0 | } |
385 | 0 | } |
386 | 0 | } |
387 | |
|
388 | 0 | if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) { |
389 | 0 | zip_uint32_t new_size = hash->table_size / 2; |
390 | 0 | while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) { |
391 | 0 | new_size /= 2; |
392 | 0 | } |
393 | 0 | if (!hash_resize(hash, new_size, error)) { |
394 | 0 | return false; |
395 | 0 | } |
396 | 0 | } |
397 | | |
398 | 0 | return true; |
399 | 0 | } |