Coverage Report

Created: 2026-03-14 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libzip/lib/zip_hash.c
Line
Count
Source
1
/*
2
  zip_hash.c -- hash table string -> uint64
3
  Copyright (C) 2015-2024 Dieter Baron and Thomas Klausner
4
5
  This file is part of libzip, a library to manipulate ZIP archives.
6
  The authors can be contacted at <info@libzip.org>
7
8
  Redistribution and use in source and binary forms, with or without
9
  modification, are permitted provided that the following conditions
10
  are met:
11
  1. Redistributions of source code must retain the above copyright
12
     notice, this list of conditions and the following disclaimer.
13
  2. Redistributions in binary form must reproduce the above copyright
14
     notice, this list of conditions and the following disclaimer in
15
     the documentation and/or other materials provided with the
16
     distribution.
17
  3. The names of the authors may not be used to endorse or promote
18
     products derived from this software without specific prior
19
     written permission.
20
21
  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22
  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25
  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27
  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29
  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30
  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31
  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
*/
33
34
#include "zipint.h"
35
#include <stdlib.h>
36
#include <string.h>
37
38
/* parameter for the string hash function */
39
446k
#define HASH_MULTIPLIER 33
40
52.8k
#define HASH_START 5381
41
42
/* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */
43
34.2k
#define HASH_MAX_FILL .75
44
0
#define HASH_MIN_FILL .01
45
46
/* but hash table size is kept between these */
47
1.03k
#define HASH_MIN_SIZE 256
48
5.52k
#define HASH_MAX_SIZE 0x80000000ul
49
50
struct zip_hash_entry {
51
    const zip_uint8_t *name;
52
    zip_int64_t orig_index;
53
    zip_int64_t current_index;
54
    struct zip_hash_entry *next;
55
    zip_uint32_t hash_value;
56
};
57
typedef struct zip_hash_entry zip_hash_entry_t;
58
59
struct zip_hash {
60
    zip_uint32_t table_size;
61
    zip_uint64_t nentries;
62
    zip_hash_entry_t **table;
63
};
64
65
66
/* free list of entries */
67
13.1k
static void free_list(zip_hash_entry_t *entry) {
68
28.6k
    while (entry != NULL) {
69
15.4k
        zip_hash_entry_t *next = entry->next;
70
15.4k
        free(entry);
71
15.4k
        entry = next;
72
15.4k
    }
73
13.1k
}
74
75
76
/* compute hash of string, full 32 bit value */
77
52.8k
static zip_uint32_t hash_string(const zip_uint8_t *name) {
78
52.8k
    zip_uint64_t value = HASH_START;
79
80
52.8k
    if (name == NULL) {
81
0
        return 0;
82
0
    }
83
84
499k
    while (*name != 0) {
85
446k
        value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul);
86
446k
        name++;
87
446k
    }
88
89
52.8k
    return (zip_uint32_t)value;
90
52.8k
}
91
92
93
/* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */
94
6.55k
static bool hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) {
95
6.55k
    zip_hash_entry_t **new_table;
96
97
6.55k
    if (new_size == hash->table_size) {
98
0
        return true;
99
0
    }
100
101
6.55k
    if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) {
102
0
        zip_error_set(error, ZIP_ER_MEMORY, 0);
103
0
        return false;
104
0
    }
105
106
6.55k
    if (hash->nentries > 0) {
107
2.26k
        zip_uint32_t i;
108
109
4.61k
        for (i = 0; i < hash->table_size; i++) {
110
2.35k
            zip_hash_entry_t *entry = hash->table[i];
111
4.70k
            while (entry) {
112
2.35k
                zip_hash_entry_t *next = entry->next;
113
114
2.35k
                zip_uint32_t new_index = entry->hash_value % new_size;
115
116
2.35k
                entry->next = new_table[new_index];
117
2.35k
                new_table[new_index] = entry;
118
119
2.35k
                entry = next;
120
2.35k
            }
121
2.35k
        }
122
2.26k
    }
123
124
6.55k
    free(hash->table);
125
6.55k
    hash->table = new_table;
126
6.55k
    hash->table_size = new_size;
127
128
6.55k
    return true;
129
6.55k
}
130
131
132
3.26k
static zip_uint32_t size_for_capacity(zip_uint64_t capacity) {
133
3.26k
    double needed_size = capacity / HASH_MAX_FILL;
134
3.26k
    zip_uint32_t v;
135
136
3.26k
    if (needed_size > ZIP_UINT32_MAX) {
137
0
        v = ZIP_UINT32_MAX;
138
0
    }
139
3.26k
    else {
140
3.26k
        v = (zip_uint32_t)needed_size;
141
3.26k
    }
142
143
3.26k
    if (v > HASH_MAX_SIZE) {
144
0
        return HASH_MAX_SIZE;
145
0
    }
146
147
    /* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu>
148
     (http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */
149
150
3.26k
    v--;
151
3.26k
    v |= v >> 1;
152
3.26k
    v |= v >> 2;
153
3.26k
    v |= v >> 4;
154
3.26k
    v |= v >> 8;
155
3.26k
    v |= v >> 16;
156
3.26k
    v++;
157
158
3.26k
    return v;
159
3.26k
}
160
161
162
10.0k
zip_hash_t *_zip_hash_new(zip_error_t *error) {
163
10.0k
    zip_hash_t *hash;
164
165
10.0k
    if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) {
166
0
        zip_error_set(error, ZIP_ER_MEMORY, 0);
167
0
        return NULL;
168
0
    }
169
170
10.0k
    hash->table_size = 0;
171
10.0k
    hash->nentries = 0;
172
10.0k
    hash->table = NULL;
173
174
10.0k
    return hash;
175
10.0k
}
176
177
178
10.0k
void _zip_hash_free(zip_hash_t *hash) {
179
10.0k
    zip_uint32_t i;
180
181
10.0k
    if (hash == NULL) {
182
0
        return;
183
0
    }
184
185
10.0k
    if (hash->table != NULL) {
186
372k
        for (i = 0; i < hash->table_size; i++) {
187
367k
            if (hash->table[i] != NULL) {
188
13.1k
                free_list(hash->table[i]);
189
13.1k
            }
190
367k
        }
191
4.29k
        free(hash->table);
192
4.29k
    }
193
10.0k
    free(hash);
194
10.0k
}
195
196
197
/* insert into hash, return error on existence or memory issues */
198
52.8k
bool _zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) {
199
52.8k
    zip_uint32_t hash_value, table_index;
200
52.8k
    zip_hash_entry_t *entry;
201
202
52.8k
    if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) {
203
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
204
0
        return false;
205
0
    }
206
207
52.8k
    if (hash->table_size == 0) {
208
1.03k
        if (!hash_resize(hash, HASH_MIN_SIZE, error)) {
209
0
            return false;
210
0
        }
211
1.03k
    }
212
213
52.8k
    hash_value = hash_string(name);
214
52.8k
    table_index = hash_value % hash->table_size;
215
216
56.2k
    for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) {
217
40.7k
        if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
218
37.3k
            if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) {
219
37.3k
                zip_error_set(error, ZIP_ER_EXISTS, 0);
220
37.3k
                return false;
221
37.3k
            }
222
0
            else {
223
0
                break;
224
0
            }
225
37.3k
        }
226
40.7k
    }
227
228
15.4k
    if (entry == NULL) {
229
15.4k
        if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) {
230
0
            zip_error_set(error, ZIP_ER_MEMORY, 0);
231
0
            return false;
232
0
        }
233
15.4k
        entry->name = name;
234
15.4k
        entry->next = hash->table[table_index];
235
15.4k
        hash->table[table_index] = entry;
236
15.4k
        entry->hash_value = hash_value;
237
15.4k
        entry->orig_index = -1;
238
15.4k
        hash->nentries++;
239
15.4k
        if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) {
240
2.26k
            if (!hash_resize(hash, hash->table_size * 2, error)) {
241
0
                return false;
242
0
            }
243
2.26k
        }
244
15.4k
    }
245
246
15.4k
    if (flags & ZIP_FL_UNCHANGED) {
247
14.4k
        entry->orig_index = (zip_int64_t)index;
248
14.4k
    }
249
15.4k
    entry->current_index = (zip_int64_t)index;
250
251
15.4k
    return true;
252
15.4k
}
253
254
255
/* remove entry from hash, error if not found */
256
0
bool _zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) {
257
0
    zip_uint32_t hash_value, index;
258
0
    zip_hash_entry_t *entry, *previous;
259
260
0
    if (hash == NULL || name == NULL) {
261
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
262
0
        return false;
263
0
    }
264
265
0
    if (hash->nentries > 0) {
266
0
        hash_value = hash_string(name);
267
0
        index = hash_value % hash->table_size;
268
0
        previous = NULL;
269
0
        entry = hash->table[index];
270
0
        while (entry) {
271
0
            if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
272
0
                if (entry->orig_index == -1) {
273
0
                    if (previous) {
274
0
                        previous->next = entry->next;
275
0
                    }
276
0
                    else {
277
0
                        hash->table[index] = entry->next;
278
0
                    }
279
0
                    free(entry);
280
0
                    hash->nentries--;
281
0
                    if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
282
0
                        if (!hash_resize(hash, hash->table_size / 2, error)) {
283
0
                            return false;
284
0
                        }
285
0
                    }
286
0
                }
287
0
                else {
288
0
                    entry->current_index = -1;
289
0
                }
290
0
                return true;
291
0
            }
292
0
            previous = entry;
293
0
            entry = entry->next;
294
0
        }
295
0
    }
296
297
0
    zip_error_set(error, ZIP_ER_NOENT, 0);
298
0
    return false;
299
0
}
300
301
302
/* find value for entry in hash, -1 if not found */
303
2.06k
zip_int64_t _zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) {
304
2.06k
    zip_uint32_t hash_value, index;
305
2.06k
    zip_hash_entry_t *entry;
306
307
2.06k
    if (hash == NULL || name == NULL) {
308
0
        zip_error_set(error, ZIP_ER_INVAL, 0);
309
0
        return -1;
310
0
    }
311
312
2.06k
    if (hash->nentries > 0) {
313
0
        hash_value = hash_string(name);
314
0
        index = hash_value % hash->table_size;
315
0
        for (entry = hash->table[index]; entry != NULL; entry = entry->next) {
316
0
            if (strcmp((const char *)name, (const char *)entry->name) == 0) {
317
0
                if (flags & ZIP_FL_UNCHANGED) {
318
0
                    if (entry->orig_index != -1) {
319
0
                        return entry->orig_index;
320
0
                    }
321
0
                }
322
0
                else {
323
0
                    if (entry->current_index != -1) {
324
0
                        return entry->current_index;
325
0
                    }
326
0
                }
327
0
                break;
328
0
            }
329
0
        }
330
0
    }
331
332
2.06k
    zip_error_set(error, ZIP_ER_NOENT, 0);
333
2.06k
    return -1;
334
2.06k
}
335
336
337
3.80k
bool _zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) {
338
3.80k
    zip_uint32_t new_size;
339
340
3.80k
    if (capacity == 0) {
341
539
        return true;
342
539
    }
343
344
3.26k
    new_size = size_for_capacity(capacity);
345
346
3.26k
    if (new_size <= hash->table_size) {
347
0
        return true;
348
0
    }
349
350
3.26k
    if (!hash_resize(hash, new_size, error)) {
351
0
        return false;
352
0
    }
353
354
3.26k
    return true;
355
3.26k
}
356
357
358
0
bool _zip_hash_revert(zip_hash_t *hash, zip_error_t *error) {
359
0
    zip_uint32_t i;
360
0
    zip_hash_entry_t *entry, *previous;
361
362
0
    for (i = 0; i < hash->table_size; i++) {
363
0
        previous = NULL;
364
0
        entry = hash->table[i];
365
0
        while (entry) {
366
0
            if (entry->orig_index == -1) {
367
0
                zip_hash_entry_t *p;
368
0
                if (previous) {
369
0
                    previous->next = entry->next;
370
0
                }
371
0
                else {
372
0
                    hash->table[i] = entry->next;
373
0
                }
374
0
                p = entry;
375
0
                entry = entry->next;
376
                /* previous does not change */
377
0
                free(p);
378
0
                hash->nentries--;
379
0
            }
380
0
            else {
381
0
                entry->current_index = entry->orig_index;
382
0
                previous = entry;
383
0
                entry = entry->next;
384
0
            }
385
0
        }
386
0
    }
387
388
0
    if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
389
0
        zip_uint32_t new_size = hash->table_size / 2;
390
0
        while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) {
391
0
            new_size /= 2;
392
0
        }
393
0
        if (!hash_resize(hash, new_size, error)) {
394
0
            return false;
395
0
        }
396
0
    }
397
398
0
    return true;
399
0
}