Coverage Report

Created: 2026-02-14 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libzip/lib/zip_hash.c
Line
Count
Source
1
/*
2
  zip_hash.c -- hash table string -> uint64
3
  Copyright (C) 2015-2024 Dieter Baron and Thomas Klausner
4
5
  This file is part of libzip, a library to manipulate ZIP archives.
6
  The authors can be contacted at <info@libzip.org>
7
8
  Redistribution and use in source and binary forms, with or without
9
  modification, are permitted provided that the following conditions
10
  are met:
11
  1. Redistributions of source code must retain the above copyright
12
     notice, this list of conditions and the following disclaimer.
13
  2. Redistributions in binary form must reproduce the above copyright
14
     notice, this list of conditions and the following disclaimer in
15
     the documentation and/or other materials provided with the
16
     distribution.
17
  3. The names of the authors may not be used to endorse or promote
18
     products derived from this software without specific prior
19
     written permission.
20
21
  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22
  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25
  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27
  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29
  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30
  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31
  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
*/
33
34
#include "zipint.h"
35
#include <stdlib.h>
36
#include <string.h>
37
38
/* parameter for the string hash function */
39
406k
#define HASH_MULTIPLIER 33
40
49.2k
#define HASH_START 5381
41
42
/* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */
43
31.0k
#define HASH_MAX_FILL .75
44
0
#define HASH_MIN_FILL .01
45
46
/* but hash table size is kept between these */
47
1.03k
#define HASH_MIN_SIZE 256
48
4.90k
#define HASH_MAX_SIZE 0x80000000ul
49
50
struct zip_hash_entry {
51
    const zip_uint8_t *name;
52
    zip_int64_t orig_index;
53
    zip_int64_t current_index;
54
    struct zip_hash_entry *next;
55
    zip_uint32_t hash_value;
56
};
57
typedef struct zip_hash_entry zip_hash_entry_t;
58
59
struct zip_hash {
60
    zip_uint32_t table_size;
61
    zip_uint64_t nentries;
62
    zip_hash_entry_t **table;
63
};
64
65
66
/* free list of entries */
67
static void
68
11.9k
free_list(zip_hash_entry_t *entry) {
69
26.0k
    while (entry != NULL) {
70
14.0k
        zip_hash_entry_t *next = entry->next;
71
14.0k
        free(entry);
72
14.0k
        entry = next;
73
14.0k
    }
74
11.9k
}
75
76
77
/* compute hash of string, full 32 bit value */
78
static zip_uint32_t
79
49.2k
hash_string(const zip_uint8_t *name) {
80
49.2k
    zip_uint64_t value = HASH_START;
81
82
49.2k
    if (name == NULL) {
83
0
        return 0;
84
0
    }
85
86
455k
    while (*name != 0) {
87
406k
        value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul);
88
406k
        name++;
89
406k
    }
90
91
49.2k
    return (zip_uint32_t)value;
92
49.2k
}
93
94
95
/* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */
96
static bool
97
5.94k
hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) {
98
5.94k
    zip_hash_entry_t **new_table;
99
100
5.94k
    if (new_size == hash->table_size) {
101
0
        return true;
102
0
    }
103
104
5.94k
    if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) {
105
0
        zip_error_set(error, ZIP_ER_MEMORY, 0);
106
0
        return false;
107
0
    }
108
109
5.94k
    if (hash->nentries > 0) {
110
1.97k
        zip_uint32_t i;
111
112
4.03k
        for (i = 0; i < hash->table_size; i++) {
113
2.05k
            zip_hash_entry_t *entry = hash->table[i];
114
4.11k
            while (entry) {
115
2.05k
                zip_hash_entry_t *next = entry->next;
116
117
2.05k
                zip_uint32_t new_index = entry->hash_value % new_size;
118
119
2.05k
                entry->next = new_table[new_index];
120
2.05k
                new_table[new_index] = entry;
121
122
2.05k
                entry = next;
123
2.05k
            }
124
2.05k
        }
125
1.97k
    }
126
127
5.94k
    free(hash->table);
128
5.94k
    hash->table = new_table;
129
5.94k
    hash->table_size = new_size;
130
131
5.94k
    return true;
132
5.94k
}
133
134
135
static zip_uint32_t
136
2.93k
size_for_capacity(zip_uint64_t capacity) {
137
2.93k
    double needed_size = capacity / HASH_MAX_FILL;
138
2.93k
    zip_uint32_t v;
139
140
2.93k
    if (needed_size > ZIP_UINT32_MAX) {
141
0
        v = ZIP_UINT32_MAX;
142
0
    }
143
2.93k
    else {
144
2.93k
        v = (zip_uint32_t)needed_size;
145
2.93k
    }
146
147
2.93k
    if (v > HASH_MAX_SIZE) {
148
0
        return HASH_MAX_SIZE;
149
0
    }
150
151
    /* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu>
152
     (http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */
153
154
2.93k
    v--;
155
2.93k
    v |= v >> 1;
156
2.93k
    v |= v >> 2;
157
2.93k
    v |= v >> 4;
158
2.93k
    v |= v >> 8;
159
2.93k
    v |= v >> 16;
160
2.93k
    v++;
161
162
2.93k
    return v;
163
2.93k
}
164
165
166
zip_hash_t *
167
9.45k
_zip_hash_new(zip_error_t *error) {
168
9.45k
    zip_hash_t *hash;
169
170
9.45k
    if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) {
171
0
        zip_error_set(error, ZIP_ER_MEMORY, 0);
172
0
        return NULL;
173
0
    }
174
175
9.45k
    hash->table_size = 0;
176
9.45k
    hash->nentries = 0;
177
9.45k
    hash->table = NULL;
178
179
9.45k
    return hash;
180
9.45k
}
181
182
183
void
184
9.45k
_zip_hash_free(zip_hash_t *hash) {
185
9.45k
    zip_uint32_t i;
186
187
9.45k
    if (hash == NULL) {
188
0
        return;
189
0
    }
190
191
9.45k
    if (hash->table != NULL) {
192
365k
        for (i = 0; i < hash->table_size; i++) {
193
361k
            if (hash->table[i] != NULL) {
194
11.9k
                free_list(hash->table[i]);
195
11.9k
            }
196
361k
        }
197
3.96k
        free(hash->table);
198
3.96k
    }
199
9.45k
    free(hash);
200
9.45k
}
201
202
203
/* insert into hash, return error on existence or memory issues */
204
bool
205
49.2k
_zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) {
206
49.2k
    zip_uint32_t hash_value, table_index;
207
49.2k
    zip_hash_entry_t *entry;
208
209
49.2k
    if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) {
210
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
211
0
        return false;
212
0
    }
213
214
49.2k
    if (hash->table_size == 0) {
215
1.03k
        if (!hash_resize(hash, HASH_MIN_SIZE, error)) {
216
0
            return false;
217
0
        }
218
1.03k
    }
219
220
49.2k
    hash_value = hash_string(name);
221
49.2k
    table_index = hash_value % hash->table_size;
222
223
52.5k
    for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) {
224
38.4k
        if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
225
35.2k
            if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) {
226
35.2k
                zip_error_set(error, ZIP_ER_EXISTS, 0);
227
35.2k
                return false;
228
35.2k
            }
229
0
            else {
230
0
                break;
231
0
            }
232
35.2k
        }
233
38.4k
    }
234
235
14.0k
    if (entry == NULL) {
236
14.0k
        if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) {
237
0
            zip_error_set(error, ZIP_ER_MEMORY, 0);
238
0
            return false;
239
0
        }
240
14.0k
        entry->name = name;
241
14.0k
        entry->next = hash->table[table_index];
242
14.0k
        hash->table[table_index] = entry;
243
14.0k
        entry->hash_value = hash_value;
244
14.0k
        entry->orig_index = -1;
245
14.0k
        hash->nentries++;
246
14.0k
        if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) {
247
1.97k
            if (!hash_resize(hash, hash->table_size * 2, error)) {
248
0
                return false;
249
0
            }
250
1.97k
        }
251
14.0k
    }
252
253
14.0k
    if (flags & ZIP_FL_UNCHANGED) {
254
13.0k
        entry->orig_index = (zip_int64_t)index;
255
13.0k
    }
256
14.0k
    entry->current_index = (zip_int64_t)index;
257
258
14.0k
    return true;
259
14.0k
}
260
261
262
/* remove entry from hash, error if not found */
263
bool
264
0
_zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) {
265
0
    zip_uint32_t hash_value, index;
266
0
    zip_hash_entry_t *entry, *previous;
267
268
0
    if (hash == NULL || name == NULL) {
269
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
270
0
        return false;
271
0
    }
272
273
0
    if (hash->nentries > 0) {
274
0
        hash_value = hash_string(name);
275
0
        index = hash_value % hash->table_size;
276
0
        previous = NULL;
277
0
        entry = hash->table[index];
278
0
        while (entry) {
279
0
            if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
280
0
                if (entry->orig_index == -1) {
281
0
                    if (previous) {
282
0
                        previous->next = entry->next;
283
0
                    }
284
0
                    else {
285
0
                        hash->table[index] = entry->next;
286
0
                    }
287
0
                    free(entry);
288
0
                    hash->nentries--;
289
0
                    if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
290
0
                        if (!hash_resize(hash, hash->table_size / 2, error)) {
291
0
                            return false;
292
0
                        }
293
0
                    }
294
0
                }
295
0
                else {
296
0
                    entry->current_index = -1;
297
0
                }
298
0
                return true;
299
0
            }
300
0
            previous = entry;
301
0
            entry = entry->next;
302
0
        }
303
0
    }
304
305
0
    zip_error_set(error, ZIP_ER_NOENT, 0);
306
0
    return false;
307
0
}
308
309
310
/* find value for entry in hash, -1 if not found */
311
zip_int64_t
312
2.06k
_zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) {
313
2.06k
    zip_uint32_t hash_value, index;
314
2.06k
    zip_hash_entry_t *entry;
315
316
2.06k
    if (hash == NULL || name == NULL) {
317
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
318
0
        return -1;
319
0
    }
320
321
2.06k
    if (hash->nentries > 0) {
322
0
        hash_value = hash_string(name);
323
0
        index = hash_value % hash->table_size;
324
0
        for (entry = hash->table[index]; entry != NULL; entry = entry->next) {
325
0
            if (strcmp((const char *)name, (const char *)entry->name) == 0) {
326
0
                if (flags & ZIP_FL_UNCHANGED) {
327
0
                    if (entry->orig_index != -1) {
328
0
                        return entry->orig_index;
329
0
                    }
330
0
                }
331
0
                else {
332
0
                    if (entry->current_index != -1) {
333
0
                        return entry->current_index;
334
0
                    }
335
0
                }
336
0
                break;
337
0
            }
338
0
        }
339
0
    }
340
341
2.06k
    zip_error_set(error, ZIP_ER_NOENT, 0);
342
2.06k
    return -1;
343
2.06k
}
344
345
346
bool
347
3.40k
_zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) {
348
3.40k
    zip_uint32_t new_size;
349
350
3.40k
    if (capacity == 0) {
351
479
        return true;
352
479
    }
353
354
2.93k
    new_size = size_for_capacity(capacity);
355
356
2.93k
    if (new_size <= hash->table_size) {
357
0
        return true;
358
0
    }
359
360
2.93k
    if (!hash_resize(hash, new_size, error)) {
361
0
        return false;
362
0
    }
363
364
2.93k
    return true;
365
2.93k
}
366
367
368
bool
369
0
_zip_hash_revert(zip_hash_t *hash, zip_error_t *error) {
370
0
    zip_uint32_t i;
371
0
    zip_hash_entry_t *entry, *previous;
372
373
0
    for (i = 0; i < hash->table_size; i++) {
374
0
        previous = NULL;
375
0
        entry = hash->table[i];
376
0
        while (entry) {
377
0
            if (entry->orig_index == -1) {
378
0
                zip_hash_entry_t *p;
379
0
                if (previous) {
380
0
                    previous->next = entry->next;
381
0
                }
382
0
                else {
383
0
                    hash->table[i] = entry->next;
384
0
                }
385
0
                p = entry;
386
0
                entry = entry->next;
387
                /* previous does not change */
388
0
                free(p);
389
0
                hash->nentries--;
390
0
            }
391
0
            else {
392
0
                entry->current_index = entry->orig_index;
393
0
                previous = entry;
394
0
                entry = entry->next;
395
0
            }
396
0
        }
397
0
    }
398
399
0
    if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
400
0
        zip_uint32_t new_size = hash->table_size / 2;
401
0
        while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) {
402
0
            new_size /= 2;
403
0
        }
404
0
        if (!hash_resize(hash, new_size, error)) {
405
0
            return false;
406
0
        }
407
0
    }
408
409
0
    return true;
410
0
}