/src/gettext-0.26/gettext-tools/libgettextpo/mem-hash-map.c
Line | Count | Source |
1 | | /* Simple hash table (no removals) where the keys are memory blocks. |
2 | | Copyright (C) 1994-2025 Free Software Foundation, Inc. |
3 | | Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. |
4 | | |
5 | | This file is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published |
7 | | by the Free Software Foundation, either version 3 of the License, |
8 | | or (at your option) any later version. |
9 | | |
10 | | This file is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
17 | | |
18 | | #include <config.h> |
19 | | |
20 | | /* Specification. */ |
21 | | #include "mem-hash-map.h" |
22 | | |
23 | | #include <stdlib.h> |
24 | | #include <string.h> |
25 | | #include <stdio.h> |
26 | | #include <limits.h> |
27 | | #include <sys/types.h> |
28 | | |
29 | | #include "next-prime.h" |
30 | | |
31 | | /* Since this simple implementation of hash tables allows only insertion, no |
32 | | removal of entries, the right data structure for the memory holding all keys |
33 | | is an obstack. */ |
34 | | #include "obstack.h" |
35 | | |
36 | | /* Use checked memory allocation. */ |
37 | | #include "xalloc.h" |
38 | | |
39 | | #define obstack_chunk_alloc xmalloc |
40 | | #define obstack_chunk_free free |
41 | | |
42 | | |
43 | | typedef struct hash_entry |
44 | | { |
45 | | size_t used; /* Hash code of the key, or 0 for an unused entry. */ |
46 | | const void *key; /* Key. */ |
47 | | size_t keylen; |
48 | | void *data; /* Value. */ |
49 | | struct hash_entry *next; |
50 | | } |
51 | | hash_entry; |
52 | | |
53 | | |
54 | | /* Initialize a hash table. INIT_SIZE > 1 is the initial number of available |
55 | | entries. |
56 | | Return 0 always. */ |
57 | | int |
58 | | hash_init (hash_table *htab, size_t init_size) |
59 | 10.5k | { |
60 | | /* We need the size to be a prime. */ |
61 | 10.5k | init_size = next_prime (init_size); |
62 | | |
63 | | /* Initialize the data structure. */ |
64 | 10.5k | htab->size = init_size; |
65 | 10.5k | htab->filled = 0; |
66 | 10.5k | htab->first = NULL; |
67 | 10.5k | htab->table = XCALLOC (init_size + 1, hash_entry); |
68 | | |
69 | 10.5k | obstack_init (&htab->mem_pool); |
70 | | |
71 | 10.5k | return 0; |
72 | 10.5k | } |
73 | | |
74 | | |
75 | | /* Delete a hash table's contents. |
76 | | Return 0 always. */ |
77 | | int |
78 | | hash_destroy (hash_table *htab) |
79 | 10.5k | { |
80 | 10.5k | free (htab->table); |
81 | 10.5k | obstack_free (&htab->mem_pool, NULL); |
82 | 10.5k | return 0; |
83 | 10.5k | } |
84 | | |
85 | | |
86 | | /* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY |
87 | | in memory. */ |
88 | | static size_t |
89 | | compute_hashval (const void *key, size_t keylen) |
90 | 84.1k | { |
91 | 84.1k | size_t cnt; |
92 | 84.1k | size_t hval; |
93 | | |
94 | | /* Compute the hash value for the given string. The algorithm |
95 | | is taken from [Aho,Sethi,Ullman], fixed according to |
96 | | https://haible.de/bruno/hashfunc.html. */ |
97 | 84.1k | cnt = 0; |
98 | 84.1k | hval = keylen; |
99 | 4.73M | while (cnt < keylen) |
100 | 4.65M | { |
101 | 4.65M | hval = (hval << 9) | (hval >> (sizeof (size_t) * CHAR_BIT - 9)); |
102 | 4.65M | hval += (size_t) *(((const char *) key) + cnt++); |
103 | 4.65M | } |
104 | 84.1k | return hval != 0 ? hval : ~((size_t) 0); |
105 | 84.1k | } |
106 | | |
107 | | |
108 | | /* References: |
109 | | [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 |
110 | | [Knuth] The Art of Computer Programming, part3 (6.4) */ |
111 | | |
112 | | /* Look up a given key in the hash table. |
113 | | Return the index of the entry, if present, or otherwise the index a free |
114 | | entry where it could be inserted. */ |
115 | | static size_t |
116 | | lookup (const hash_table *htab, |
117 | | const void *key, size_t keylen, |
118 | | size_t hval) |
119 | 94.6k | { |
120 | 94.6k | size_t hash; |
121 | 94.6k | size_t idx; |
122 | 94.6k | hash_entry *table = htab->table; |
123 | | |
124 | | /* First hash function: simply take the modul but prevent zero. */ |
125 | 94.6k | hash = 1 + hval % htab->size; |
126 | | |
127 | 94.6k | idx = hash; |
128 | | |
129 | 94.6k | if (table[idx].used) |
130 | 59.8k | { |
131 | 59.8k | if (table[idx].used == hval && table[idx].keylen == keylen |
132 | 40.0k | && memcmp (table[idx].key, key, keylen) == 0) |
133 | 39.8k | return idx; |
134 | | |
135 | | /* Second hash function as suggested in [Knuth]. */ |
136 | 20.0k | hash = 1 + hval % (htab->size - 2); |
137 | | |
138 | 20.0k | do |
139 | 34.1k | { |
140 | 34.1k | if (idx <= hash) |
141 | 18.2k | idx = htab->size + idx - hash; |
142 | 15.9k | else |
143 | 15.9k | idx -= hash; |
144 | | |
145 | | /* If entry is found use it. */ |
146 | 34.1k | if (table[idx].used == hval && table[idx].keylen == keylen |
147 | 6.97k | && memcmp (table[idx].key, key, keylen) == 0) |
148 | 6.82k | return idx; |
149 | 34.1k | } |
150 | 27.3k | while (table[idx].used); |
151 | 20.0k | } |
152 | 48.0k | return idx; |
153 | 94.6k | } |
154 | | |
155 | | |
156 | | /* Look up the value of a key in the given table. |
157 | | If found, return 0 and set *RESULT to it. Otherwise return -1. */ |
158 | | int |
159 | | hash_find_entry (const hash_table *htab, const void *key, size_t keylen, |
160 | | void **result) |
161 | 65.4k | { |
162 | 65.4k | hash_entry *table = htab->table; |
163 | 65.4k | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
164 | | |
165 | 65.4k | if (table[idx].used == 0) |
166 | 18.7k | return -1; |
167 | | |
168 | 46.6k | *result = table[idx].data; |
169 | 46.6k | return 0; |
170 | 65.4k | } |
171 | | |
172 | | |
173 | | /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX. |
174 | | HVAL is the key's hash code. IDX depends on it. The table entry at index |
175 | | IDX is known to be unused. */ |
176 | | static void |
177 | | insert_entry_2 (hash_table *htab, |
178 | | const void *key, size_t keylen, |
179 | | size_t hval, size_t idx, void *data) |
180 | 29.2k | { |
181 | 29.2k | hash_entry *table = htab->table; |
182 | | |
183 | 29.2k | table[idx].used = hval; |
184 | 29.2k | table[idx].key = key; |
185 | 29.2k | table[idx].keylen = keylen; |
186 | 29.2k | table[idx].data = data; |
187 | | |
188 | | /* List the new value in the list. */ |
189 | 29.2k | if (htab->first == NULL) |
190 | 6.39k | { |
191 | 6.39k | table[idx].next = &table[idx]; |
192 | 6.39k | htab->first = &table[idx]; |
193 | 6.39k | } |
194 | 22.8k | else |
195 | 22.8k | { |
196 | 22.8k | table[idx].next = htab->first->next; |
197 | 22.8k | htab->first->next = &table[idx]; |
198 | 22.8k | htab->first = &table[idx]; |
199 | 22.8k | } |
200 | | |
201 | 29.2k | ++htab->filled; |
202 | 29.2k | } |
203 | | |
204 | | |
205 | | /* Grow the hash table. */ |
206 | | static void |
207 | | resize (hash_table *htab) |
208 | 966 | { |
209 | 966 | size_t old_size = htab->size; |
210 | 966 | hash_entry *table = htab->table; |
211 | 966 | size_t idx; |
212 | | |
213 | 966 | htab->size = next_prime (htab->size * 2); |
214 | 966 | htab->filled = 0; |
215 | 966 | htab->first = NULL; |
216 | 966 | htab->table = XCALLOC (1 + htab->size, hash_entry); |
217 | | |
218 | 13.9k | for (idx = 1; idx <= old_size; ++idx) |
219 | 13.0k | if (table[idx].used) |
220 | 10.4k | insert_entry_2 (htab, table[idx].key, table[idx].keylen, |
221 | 10.4k | table[idx].used, |
222 | 10.4k | lookup (htab, table[idx].key, table[idx].keylen, |
223 | 10.4k | table[idx].used), |
224 | 10.4k | table[idx].data); |
225 | | |
226 | 966 | free (table); |
227 | 966 | } |
228 | | |
229 | | |
230 | | /* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. |
231 | | Return non-NULL (more precisely, the address of the KEY inside the table's |
232 | | memory pool) if successful, or NULL if there is already an entry with the |
233 | | given key. */ |
234 | | const void * |
235 | | hash_insert_entry (hash_table *htab, |
236 | | const void *key, size_t keylen, |
237 | | void *data) |
238 | 18.7k | { |
239 | 18.7k | size_t hval = compute_hashval (key, keylen); |
240 | 18.7k | hash_entry *table = htab->table; |
241 | 18.7k | size_t idx = lookup (htab, key, keylen, hval); |
242 | | |
243 | 18.7k | if (table[idx].used) |
244 | | /* We don't want to overwrite the old value. */ |
245 | 0 | return NULL; |
246 | 18.7k | else |
247 | 18.7k | { |
248 | | /* An empty bucket has been found. */ |
249 | 18.7k | void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); |
250 | 18.7k | insert_entry_2 (htab, keycopy, keylen, hval, idx, data); |
251 | 18.7k | if (100 * htab->filled > 75 * htab->size) |
252 | | /* Table is filled more than 75%. Resize the table. */ |
253 | 966 | resize (htab); |
254 | 18.7k | return keycopy; |
255 | 18.7k | } |
256 | 18.7k | } |
257 | | |
258 | | |
259 | | /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. |
260 | | Return 0. */ |
261 | | int |
262 | | hash_set_value (hash_table *htab, |
263 | | const void *key, size_t keylen, |
264 | | void *data) |
265 | 0 | { |
266 | 0 | size_t hval = compute_hashval (key, keylen); |
267 | 0 | hash_entry *table = htab->table; |
268 | 0 | size_t idx = lookup (htab, key, keylen, hval); |
269 | |
|
270 | 0 | if (table[idx].used) |
271 | 0 | { |
272 | | /* Overwrite the old value. */ |
273 | 0 | table[idx].data = data; |
274 | 0 | return 0; |
275 | 0 | } |
276 | 0 | else |
277 | 0 | { |
278 | | /* An empty bucket has been found. */ |
279 | 0 | void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); |
280 | 0 | insert_entry_2 (htab, keycopy, keylen, hval, idx, data); |
281 | 0 | if (100 * htab->filled > 75 * htab->size) |
282 | | /* Table is filled more than 75%. Resize the table. */ |
283 | 0 | resize (htab); |
284 | 0 | return 0; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | |
289 | | /* Steps *PTR forward to the next used entry in the given hash table. *PTR |
290 | | should be initially set to NULL. Store information about the next entry |
291 | | in *KEY, *KEYLEN, *DATA. |
292 | | Return 0 normally, -1 when the whole hash table has been traversed. */ |
293 | | int |
294 | | hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen, |
295 | | void **data) |
296 | 0 | { |
297 | 0 | hash_entry *curr; |
298 | |
|
299 | 0 | if (*ptr == NULL) |
300 | 0 | { |
301 | 0 | if (htab->first == NULL) |
302 | 0 | return -1; |
303 | 0 | curr = htab->first; |
304 | 0 | } |
305 | 0 | else |
306 | 0 | { |
307 | 0 | if (*ptr == htab->first) |
308 | 0 | return -1; |
309 | 0 | curr = (hash_entry *) *ptr; |
310 | 0 | } |
311 | 0 | curr = curr->next; |
312 | 0 | *ptr = (void *) curr; |
313 | |
|
314 | 0 | *key = curr->key; |
315 | 0 | *keylen = curr->keylen; |
316 | 0 | *data = curr->data; |
317 | 0 | return 0; |
318 | 0 | } |
319 | | |
320 | | |
321 | | /* Steps *PTR forward to the next used entry in the given hash table. *PTR |
322 | | should be initially set to NULL. Store information about the next entry |
323 | | in *KEY, *KEYLEN, *DATAP. *DATAP is set to point to the storage of the |
324 | | value; modifying **DATAP will modify the value of the entry. |
325 | | Return 0 normally, -1 when the whole hash table has been traversed. */ |
326 | | int |
327 | | hash_iterate_modify (hash_table *htab, void **ptr, |
328 | | const void **key, size_t *keylen, |
329 | | void ***datap) |
330 | 0 | { |
331 | 0 | hash_entry *curr; |
332 | |
|
333 | 0 | if (*ptr == NULL) |
334 | 0 | { |
335 | 0 | if (htab->first == NULL) |
336 | 0 | return -1; |
337 | 0 | curr = htab->first; |
338 | 0 | } |
339 | 0 | else |
340 | 0 | { |
341 | 0 | if (*ptr == htab->first) |
342 | 0 | return -1; |
343 | 0 | curr = (hash_entry *) *ptr; |
344 | 0 | } |
345 | 0 | curr = curr->next; |
346 | 0 | *ptr = (void *) curr; |
347 | |
|
348 | 0 | *key = curr->key; |
349 | 0 | *keylen = curr->keylen; |
350 | 0 | *datap = &curr->data; |
351 | 0 | return 0; |
352 | 0 | } |