Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Generic implementation of hash-based key value mappings. |
3 | | */ |
4 | | #include "git-compat-util.h" |
5 | | #include "hashmap.h" |
6 | | |
7 | 323k | #define FNV32_BASE ((unsigned int) 0x811c9dc5) |
8 | 9.11M | #define FNV32_PRIME ((unsigned int) 0x01000193) |
9 | | |
10 | | unsigned int strhash(const char *str) |
11 | 190k | { |
12 | 190k | unsigned int c, hash = FNV32_BASE; |
13 | 3.45M | while ((c = (unsigned char) *str++)) |
14 | 3.26M | hash = (hash * FNV32_PRIME) ^ c; |
15 | 190k | return hash; |
16 | 190k | } |
17 | | |
18 | | unsigned int strihash(const char *str) |
19 | 0 | { |
20 | 0 | unsigned int c, hash = FNV32_BASE; |
21 | 0 | while ((c = (unsigned char) *str++)) { |
22 | 0 | if (c >= 'a' && c <= 'z') |
23 | 0 | c -= 'a' - 'A'; |
24 | 0 | hash = (hash * FNV32_PRIME) ^ c; |
25 | 0 | } |
26 | 0 | return hash; |
27 | 0 | } |
28 | | |
29 | | unsigned int memhash(const void *buf, size_t len) |
30 | 45.0k | { |
31 | 45.0k | unsigned int hash = FNV32_BASE; |
32 | 45.0k | unsigned char *ucbuf = (unsigned char *) buf; |
33 | 2.24M | while (len--) { |
34 | 2.20M | unsigned int c = *ucbuf++; |
35 | 2.20M | hash = (hash * FNV32_PRIME) ^ c; |
36 | 2.20M | } |
37 | 45.0k | return hash; |
38 | 45.0k | } |
39 | | |
40 | | unsigned int memihash(const void *buf, size_t len) |
41 | 87.9k | { |
42 | 87.9k | unsigned int hash = FNV32_BASE; |
43 | 87.9k | unsigned char *ucbuf = (unsigned char *) buf; |
44 | 3.74M | while (len--) { |
45 | 3.65M | unsigned int c = *ucbuf++; |
46 | 3.65M | if (c >= 'a' && c <= 'z') |
47 | 993k | c -= 'a' - 'A'; |
48 | 3.65M | hash = (hash * FNV32_PRIME) ^ c; |
49 | 3.65M | } |
50 | 87.9k | return hash; |
51 | 87.9k | } |
52 | | |
53 | | /* |
54 | | * Incorporate another chunk of data into a memihash |
55 | | * computation. |
56 | | */ |
57 | | unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len) |
58 | 0 | { |
59 | 0 | unsigned int hash = hash_seed; |
60 | 0 | unsigned char *ucbuf = (unsigned char *) buf; |
61 | 0 | while (len--) { |
62 | 0 | unsigned int c = *ucbuf++; |
63 | 0 | if (c >= 'a' && c <= 'z') |
64 | 0 | c -= 'a' - 'A'; |
65 | 0 | hash = (hash * FNV32_PRIME) ^ c; |
66 | 0 | } |
67 | 0 | return hash; |
68 | 0 | } |
69 | | |
70 | 51.2k | #define HASHMAP_INITIAL_SIZE 64 |
71 | | /* grow / shrink by 2^2 */ |
72 | 0 | #define HASHMAP_RESIZE_BITS 2 |
73 | | /* load factor in percent */ |
74 | 51.2k | #define HASHMAP_LOAD_FACTOR 80 |
75 | | |
76 | | static void alloc_table(struct hashmap *map, unsigned int size) |
77 | 25.6k | { |
78 | 25.6k | map->tablesize = size; |
79 | 25.6k | CALLOC_ARRAY(map->table, size); |
80 | | |
81 | | /* calculate resize thresholds for new size */ |
82 | 25.6k | map->grow_at = (unsigned int) ((uint64_t) size * HASHMAP_LOAD_FACTOR / 100); |
83 | 25.6k | if (size <= HASHMAP_INITIAL_SIZE) |
84 | 25.6k | map->shrink_at = 0; |
85 | 0 | else |
86 | | /* |
87 | | * The shrink-threshold must be slightly smaller than |
88 | | * (grow-threshold / resize-factor) to prevent erratic resizing, |
89 | | * thus we divide by (resize-factor + 1). |
90 | | */ |
91 | 0 | map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1); |
92 | 25.6k | } |
93 | | |
94 | | static inline int entry_equals(const struct hashmap *map, |
95 | | const struct hashmap_entry *e1, |
96 | | const struct hashmap_entry *e2, |
97 | | const void *keydata) |
98 | 86.5k | { |
99 | 86.5k | return (e1 == e2) || |
100 | 86.5k | (e1->hash == e2->hash && |
101 | 86.5k | !map->cmpfn(map->cmpfn_data, e1, e2, keydata)); |
102 | 86.5k | } |
103 | | |
104 | | static inline unsigned int bucket(const struct hashmap *map, |
105 | | const struct hashmap_entry *key) |
106 | 463k | { |
107 | 463k | return key->hash & (map->tablesize - 1); |
108 | 463k | } |
109 | | |
110 | | int hashmap_bucket(const struct hashmap *map, unsigned int hash) |
111 | 0 | { |
112 | 0 | return hash & (map->tablesize - 1); |
113 | 0 | } |
114 | | |
115 | | static void rehash(struct hashmap *map, unsigned int newsize) |
116 | 0 | { |
117 | | /* map->table MUST NOT be NULL when this function is called */ |
118 | 0 | unsigned int i, oldsize = map->tablesize; |
119 | 0 | struct hashmap_entry **oldtable = map->table; |
120 | |
|
121 | 0 | alloc_table(map, newsize); |
122 | 0 | for (i = 0; i < oldsize; i++) { |
123 | 0 | struct hashmap_entry *e = oldtable[i]; |
124 | 0 | while (e) { |
125 | 0 | struct hashmap_entry *next = e->next; |
126 | 0 | unsigned int b = bucket(map, e); |
127 | 0 | e->next = map->table[b]; |
128 | 0 | map->table[b] = e; |
129 | 0 | e = next; |
130 | 0 | } |
131 | 0 | } |
132 | 0 | free(oldtable); |
133 | 0 | } |
134 | | |
135 | | static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map, |
136 | | const struct hashmap_entry *key, const void *keydata) |
137 | 409k | { |
138 | | /* map->table MUST NOT be NULL when this function is called */ |
139 | 409k | struct hashmap_entry **e = &map->table[bucket(map, key)]; |
140 | 416k | while (*e && !entry_equals(map, *e, key, keydata)) |
141 | 6.84k | e = &(*e)->next; |
142 | 409k | return e; |
143 | 409k | } |
144 | | |
145 | | static int always_equal(const void *cmp_data UNUSED, |
146 | | const struct hashmap_entry *entry1 UNUSED, |
147 | | const struct hashmap_entry *entry2 UNUSED, |
148 | | const void *keydata UNUSED) |
149 | 0 | { |
150 | 0 | return 0; |
151 | 0 | } |
152 | | |
153 | | void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, |
154 | | const void *cmpfn_data, size_t initial_size) |
155 | 25.6k | { |
156 | 25.6k | unsigned int size = HASHMAP_INITIAL_SIZE; |
157 | | |
158 | 25.6k | memset(map, 0, sizeof(*map)); |
159 | | |
160 | 25.6k | map->cmpfn = equals_function ? equals_function : always_equal; |
161 | 25.6k | map->cmpfn_data = cmpfn_data; |
162 | | |
163 | | /* calculate initial table size and allocate the table */ |
164 | 25.6k | initial_size = (unsigned int) ((uint64_t) initial_size * 100 |
165 | 25.6k | / HASHMAP_LOAD_FACTOR); |
166 | 25.6k | while (initial_size > size) |
167 | 0 | size <<= HASHMAP_RESIZE_BITS; |
168 | 25.6k | alloc_table(map, size); |
169 | | |
170 | | /* |
171 | | * Keep track of the number of items in the map and |
172 | | * allow the map to automatically grow as necessary. |
173 | | */ |
174 | 25.6k | map->do_count_items = 1; |
175 | 25.6k | } |
176 | | |
177 | | static void free_individual_entries(struct hashmap *map, ssize_t entry_offset) |
178 | 21.2k | { |
179 | 21.2k | struct hashmap_iter iter; |
180 | 21.2k | struct hashmap_entry *e; |
181 | | |
182 | 21.2k | hashmap_iter_init(map, &iter); |
183 | 63.4k | while ((e = hashmap_iter_next(&iter))) |
184 | | /* |
185 | | * like container_of, but using caller-calculated |
186 | | * offset (caller being hashmap_clear_and_free) |
187 | | */ |
188 | 42.2k | free((char *)e - entry_offset); |
189 | 21.2k | } |
190 | | |
191 | | void hashmap_partial_clear_(struct hashmap *map, ssize_t entry_offset) |
192 | 0 | { |
193 | 0 | if (!map || !map->table) |
194 | 0 | return; |
195 | 0 | if (entry_offset >= 0) /* called by hashmap_clear_entries */ |
196 | 0 | free_individual_entries(map, entry_offset); |
197 | 0 | memset(map->table, 0, map->tablesize * sizeof(struct hashmap_entry *)); |
198 | 0 | map->shrink_at = 0; |
199 | 0 | map->private_size = 0; |
200 | 0 | } |
201 | | |
202 | | void hashmap_clear_(struct hashmap *map, ssize_t entry_offset) |
203 | 49.1k | { |
204 | 49.1k | if (!map || !map->table) |
205 | 23.5k | return; |
206 | 25.6k | if (entry_offset >= 0) /* called by hashmap_clear_and_free */ |
207 | 21.2k | free_individual_entries(map, entry_offset); |
208 | 25.6k | free(map->table); |
209 | 25.6k | memset(map, 0, sizeof(*map)); |
210 | 25.6k | } |
211 | | |
212 | | struct hashmap_entry *hashmap_get(const struct hashmap *map, |
213 | | const struct hashmap_entry *key, |
214 | | const void *keydata) |
215 | 408k | { |
216 | 408k | if (!map->table) |
217 | 1 | return NULL; |
218 | 408k | return *find_entry_ptr(map, key, keydata); |
219 | 408k | } |
220 | | |
221 | | struct hashmap_entry *hashmap_get_next(const struct hashmap *map, |
222 | | const struct hashmap_entry *entry) |
223 | 0 | { |
224 | 0 | struct hashmap_entry *e = entry->next; |
225 | 0 | for (; e; e = e->next) |
226 | 0 | if (entry_equals(map, entry, e, NULL)) |
227 | 0 | return e; |
228 | 0 | return NULL; |
229 | 0 | } |
230 | | |
231 | | void hashmap_add(struct hashmap *map, struct hashmap_entry *entry) |
232 | 54.2k | { |
233 | 54.2k | unsigned int b; |
234 | | |
235 | 54.2k | if (!map->table) |
236 | 1 | alloc_table(map, HASHMAP_INITIAL_SIZE); |
237 | | |
238 | 54.2k | b = bucket(map, entry); |
239 | | /* add entry */ |
240 | 54.2k | entry->next = map->table[b]; |
241 | 54.2k | map->table[b] = entry; |
242 | | |
243 | | /* fix size and rehash if appropriate */ |
244 | 54.2k | if (map->do_count_items) { |
245 | 54.2k | map->private_size++; |
246 | 54.2k | if (map->private_size > map->grow_at) |
247 | 0 | rehash(map, map->tablesize << HASHMAP_RESIZE_BITS); |
248 | 54.2k | } |
249 | 54.2k | } |
250 | | |
251 | | struct hashmap_entry *hashmap_remove(struct hashmap *map, |
252 | | const struct hashmap_entry *key, |
253 | | const void *keydata) |
254 | 1.46k | { |
255 | 1.46k | struct hashmap_entry *old; |
256 | 1.46k | struct hashmap_entry **e; |
257 | | |
258 | 1.46k | if (!map->table) |
259 | 0 | return NULL; |
260 | 1.46k | e = find_entry_ptr(map, key, keydata); |
261 | 1.46k | if (!*e) |
262 | 1.46k | return NULL; |
263 | | |
264 | | /* remove existing entry */ |
265 | 0 | old = *e; |
266 | 0 | *e = old->next; |
267 | 0 | old->next = NULL; |
268 | | |
269 | | /* fix size and rehash if appropriate */ |
270 | 0 | if (map->do_count_items) { |
271 | 0 | map->private_size--; |
272 | 0 | if (map->private_size < map->shrink_at) |
273 | 0 | rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS); |
274 | 0 | } |
275 | |
|
276 | 0 | return old; |
277 | 1.46k | } |
278 | | |
279 | | struct hashmap_entry *hashmap_put(struct hashmap *map, |
280 | | struct hashmap_entry *entry) |
281 | 1.46k | { |
282 | 1.46k | struct hashmap_entry *old = hashmap_remove(map, entry, NULL); |
283 | 1.46k | hashmap_add(map, entry); |
284 | 1.46k | return old; |
285 | 1.46k | } |
286 | | |
287 | | void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter) |
288 | 65.9k | { |
289 | 65.9k | iter->map = map; |
290 | 65.9k | iter->tablepos = 0; |
291 | 65.9k | iter->next = NULL; |
292 | 65.9k | } |
293 | | |
294 | | struct hashmap_entry *hashmap_iter_next(struct hashmap_iter *iter) |
295 | 152k | { |
296 | 152k | struct hashmap_entry *current = iter->next; |
297 | 2.68M | for (;;) { |
298 | 2.68M | if (current) { |
299 | 86.0k | iter->next = current->next; |
300 | 86.0k | return current; |
301 | 86.0k | } |
302 | | |
303 | 2.59M | if (iter->tablepos >= iter->map->tablesize) |
304 | 65.9k | return NULL; |
305 | | |
306 | 2.52M | current = iter->map->table[iter->tablepos++]; |
307 | 2.52M | } |
308 | 152k | } |
309 | | |
310 | | struct pool_entry { |
311 | | struct hashmap_entry ent; |
312 | | size_t len; |
313 | | unsigned char data[FLEX_ARRAY]; |
314 | | }; |
315 | | |
316 | | static int pool_entry_cmp(const void *cmp_data UNUSED, |
317 | | const struct hashmap_entry *eptr, |
318 | | const struct hashmap_entry *entry_or_key, |
319 | | const void *keydata) |
320 | 42.0k | { |
321 | 42.0k | const struct pool_entry *e1, *e2; |
322 | | |
323 | 42.0k | e1 = container_of(eptr, const struct pool_entry, ent); |
324 | 42.0k | e2 = container_of(entry_or_key, const struct pool_entry, ent); |
325 | | |
326 | 42.0k | return e1->data != keydata && |
327 | 42.0k | (e1->len != e2->len || memcmp(e1->data, keydata, e1->len)); |
328 | 42.0k | } |
329 | | |
330 | | const void *memintern(const void *data, size_t len) |
331 | 42.0k | { |
332 | 42.0k | static struct hashmap map; |
333 | 42.0k | struct pool_entry key, *e; |
334 | | |
335 | | /* initialize string pool hashmap */ |
336 | 42.0k | if (!map.tablesize) |
337 | 1 | hashmap_init(&map, pool_entry_cmp, NULL, 0); |
338 | | |
339 | | /* lookup interned string in pool */ |
340 | 42.0k | hashmap_entry_init(&key.ent, memhash(data, len)); |
341 | 42.0k | key.len = len; |
342 | 42.0k | e = hashmap_get_entry(&map, &key, ent, data); |
343 | 42.0k | if (!e) { |
344 | | /* not found: create it */ |
345 | 1 | FLEX_ALLOC_MEM(e, data, data, len); |
346 | 1 | hashmap_entry_init(&e->ent, key.ent.hash); |
347 | 1 | e->len = len; |
348 | 1 | hashmap_add(&map, &e->ent); |
349 | 1 | } |
350 | 42.0k | return e->data; |
351 | 42.0k | } |