/src/gettext/gettext-tools/libgettextpo/mem-hash-map.c
Line | Count | Source |
1 | | /* Simple hash table (no removals) where the keys are memory blocks. |
2 | | Copyright (C) 1994-2026 Free Software Foundation, Inc. |
3 | | Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. |
4 | | |
5 | | This file is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published |
7 | | by the Free Software Foundation, either version 3 of the License, |
8 | | or (at your option) any later version. |
9 | | |
10 | | This file is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
17 | | |
18 | | #include <config.h> |
19 | | |
20 | | /* Specification. */ |
21 | | #include "mem-hash-map.h" |
22 | | |
23 | | #include <stdlib.h> |
24 | | #include <string.h> |
25 | | #include <stdio.h> |
26 | | #include <limits.h> |
27 | | #include <sys/types.h> |
28 | | |
29 | | #include "next-prime.h" |
30 | | |
31 | | /* Since this simple implementation of hash tables allows only insertion, no |
32 | | removal of entries, the right data structure for the memory holding all keys |
33 | | is an obstack. */ |
34 | | #include "obstack.h" |
35 | | |
36 | | /* Use checked memory allocation. */ |
37 | | #include "xalloc.h" |
38 | | |
39 | | #define obstack_chunk_alloc xmalloc |
40 | | #define obstack_chunk_free free |
41 | | |
42 | | |
43 | | typedef struct hash_entry |
44 | | { |
45 | | size_t used; /* Hash code of the key, or 0 for an unused entry. */ |
46 | | const void *key; /* Key. */ |
47 | | size_t keylen; |
48 | | void *data; /* Value. */ |
49 | | struct hash_entry *next; |
50 | | } |
51 | | hash_entry; |
52 | | |
53 | | |
54 | | /* Initialize a hash table. INIT_SIZE > 1 is the initial number of available |
55 | | entries. |
56 | | Return 0 always. */ |
57 | | int |
58 | | hash_init (hash_table *htab, size_t init_size) |
59 | 13.5k | { |
60 | | /* We need the size to be a prime. */ |
61 | 13.5k | init_size = next_prime (init_size); |
62 | | |
63 | | /* Initialize the data structure. */ |
64 | 13.5k | htab->size = init_size; |
65 | 13.5k | htab->filled = 0; |
66 | 13.5k | htab->first = NULL; |
67 | 13.5k | htab->table = XCALLOC (init_size + 1, hash_entry); |
68 | | |
69 | 13.5k | obstack_init (&htab->mem_pool); |
70 | | |
71 | 13.5k | return 0; |
72 | 13.5k | } |
73 | | |
74 | | |
75 | | /* Delete a hash table's contents. |
76 | | Return 0 always. */ |
77 | | int |
78 | | hash_destroy (hash_table *htab) |
79 | 13.5k | { |
80 | 13.5k | free (htab->table); |
81 | 13.5k | obstack_free (&htab->mem_pool, NULL); |
82 | 13.5k | return 0; |
83 | 13.5k | } |
84 | | |
85 | | |
86 | | /* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY |
87 | | in memory. */ |
88 | | static size_t |
89 | | compute_hashval (const void *key, size_t keylen) |
90 | 105k | { |
91 | | /* Compute the hash value for the given string. The algorithm |
92 | | is taken from [Aho,Sethi,Ullman], fixed according to |
93 | | https://haible.de/bruno/hashfunc.html. */ |
94 | 105k | size_t cnt = 0; |
95 | 105k | size_t hval = keylen; |
96 | 16.1M | while (cnt < keylen) |
97 | 16.0M | { |
98 | 16.0M | hval = (hval << 9) | (hval >> (sizeof (size_t) * CHAR_BIT - 9)); |
99 | 16.0M | hval += (size_t) *(((const char *) key) + cnt++); |
100 | 16.0M | } |
101 | 105k | return hval != 0 ? hval : ~((size_t) 0); |
102 | 105k | } |
103 | | |
104 | | |
105 | | /* References: |
106 | | [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 |
107 | | [Knuth] The Art of Computer Programming, part3 (6.4) */ |
108 | | |
109 | | /* Look up a given key in the hash table. |
110 | | Return the index of the entry, if present, or otherwise the index a free |
111 | | entry where it could be inserted. */ |
112 | | static size_t |
113 | | lookup (const hash_table *htab, |
114 | | const void *key, size_t keylen, |
115 | | size_t hval) |
116 | 128k | { |
117 | 128k | hash_entry *table = htab->table; |
118 | | |
119 | | /* First hash function: simply take the modul but prevent zero. */ |
120 | 128k | size_t hash = 1 + hval % htab->size; |
121 | | |
122 | 128k | size_t idx = hash; |
123 | | |
124 | 128k | if (table[idx].used) |
125 | 73.4k | { |
126 | 73.4k | if (table[idx].used == hval && table[idx].keylen == keylen |
127 | 39.4k | && memeq (table[idx].key, key, keylen)) |
128 | 38.6k | return idx; |
129 | | |
130 | | /* Second hash function as suggested in [Knuth]. */ |
131 | 34.8k | hash = 1 + hval % (htab->size - 2); |
132 | | |
133 | 34.8k | do |
134 | 63.3k | { |
135 | 63.3k | if (idx <= hash) |
136 | 33.3k | idx = htab->size + idx - hash; |
137 | 30.0k | else |
138 | 30.0k | idx -= hash; |
139 | | |
140 | | /* If entry is found use it. */ |
141 | 63.3k | if (table[idx].used == hval && table[idx].keylen == keylen |
142 | 10.2k | && memeq (table[idx].key, key, keylen)) |
143 | 9.68k | return idx; |
144 | 63.3k | } |
145 | 53.7k | while (table[idx].used); |
146 | 34.8k | } |
147 | 79.6k | return idx; |
148 | 128k | } |
149 | | |
150 | | |
151 | | /* Look up the value of a key in the given table. |
152 | | If found, return 0 and set *RESULT to it. Otherwise return -1. */ |
153 | | int |
154 | | hash_find_entry (const hash_table *htab, const void *key, size_t keylen, |
155 | | void **result) |
156 | 76.7k | { |
157 | 76.7k | hash_entry *table = htab->table; |
158 | 76.7k | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
159 | | |
160 | 76.7k | if (table[idx].used == 0) |
161 | 28.3k | return -1; |
162 | | |
163 | 48.3k | *result = table[idx].data; |
164 | 48.3k | return 0; |
165 | 76.7k | } |
166 | | |
167 | | |
168 | | /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX. |
169 | | HVAL is the key's hash code. IDX depends on it. The table entry at index |
170 | | IDX is known to be unused. */ |
171 | | static void |
172 | | insert_entry_2 (hash_table *htab, |
173 | | const void *key, size_t keylen, |
174 | | size_t hval, size_t idx, void *data) |
175 | 51.3k | { |
176 | 51.3k | hash_entry *table = htab->table; |
177 | | |
178 | 51.3k | table[idx].used = hval; |
179 | 51.3k | table[idx].key = key; |
180 | 51.3k | table[idx].keylen = keylen; |
181 | 51.3k | table[idx].data = data; |
182 | | |
183 | | /* List the new value in the list. */ |
184 | 51.3k | if (htab->first == NULL) |
185 | 7.95k | { |
186 | 7.95k | table[idx].next = &table[idx]; |
187 | 7.95k | htab->first = &table[idx]; |
188 | 7.95k | } |
189 | 43.3k | else |
190 | 43.3k | { |
191 | 43.3k | table[idx].next = htab->first->next; |
192 | 43.3k | htab->first->next = &table[idx]; |
193 | 43.3k | htab->first = &table[idx]; |
194 | 43.3k | } |
195 | | |
196 | 51.3k | ++htab->filled; |
197 | 51.3k | } |
198 | | |
199 | | |
200 | | /* Grow the hash table. */ |
201 | | static void |
202 | | resize (hash_table *htab) |
203 | 1.74k | { |
204 | 1.74k | size_t old_size = htab->size; |
205 | 1.74k | hash_entry *table = htab->table; |
206 | | |
207 | 1.74k | htab->size = next_prime (htab->size * 2); |
208 | 1.74k | htab->filled = 0; |
209 | 1.74k | htab->first = NULL; |
210 | 1.74k | htab->table = XCALLOC (1 + htab->size, hash_entry); |
211 | | |
212 | 30.5k | for (size_t idx = 1; idx <= old_size; ++idx) |
213 | 28.8k | if (table[idx].used) |
214 | 22.9k | insert_entry_2 (htab, table[idx].key, table[idx].keylen, |
215 | 22.9k | table[idx].used, |
216 | 22.9k | lookup (htab, table[idx].key, table[idx].keylen, |
217 | 22.9k | table[idx].used), |
218 | 22.9k | table[idx].data); |
219 | | |
220 | 1.74k | free (table); |
221 | 1.74k | } |
222 | | |
223 | | |
224 | | /* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. |
225 | | Return non-NULL (more precisely, the address of the KEY inside the table's |
226 | | memory pool) if successful, or NULL if there is already an entry with the |
227 | | given key. */ |
228 | | const void * |
229 | | hash_insert_entry (hash_table *htab, |
230 | | const void *key, size_t keylen, |
231 | | void *data) |
232 | 28.3k | { |
233 | 28.3k | size_t hval = compute_hashval (key, keylen); |
234 | 28.3k | hash_entry *table = htab->table; |
235 | 28.3k | size_t idx = lookup (htab, key, keylen, hval); |
236 | | |
237 | 28.3k | if (table[idx].used) |
238 | | /* We don't want to overwrite the old value. */ |
239 | 0 | return NULL; |
240 | 28.3k | else |
241 | 28.3k | { |
242 | | /* An empty bucket has been found. */ |
243 | 28.3k | void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); |
244 | 28.3k | insert_entry_2 (htab, keycopy, keylen, hval, idx, data); |
245 | 28.3k | if (100 * htab->filled > 75 * htab->size) |
246 | | /* Table is filled more than 75%. Resize the table. */ |
247 | 1.74k | resize (htab); |
248 | 28.3k | return keycopy; |
249 | 28.3k | } |
250 | 28.3k | } |
251 | | |
252 | | |
253 | | /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. |
254 | | Return 0. */ |
255 | | int |
256 | | hash_set_value (hash_table *htab, |
257 | | const void *key, size_t keylen, |
258 | | void *data) |
259 | 0 | { |
260 | 0 | size_t hval = compute_hashval (key, keylen); |
261 | 0 | hash_entry *table = htab->table; |
262 | 0 | size_t idx = lookup (htab, key, keylen, hval); |
263 | |
|
264 | 0 | if (table[idx].used) |
265 | 0 | { |
266 | | /* Overwrite the old value. */ |
267 | 0 | table[idx].data = data; |
268 | 0 | return 0; |
269 | 0 | } |
270 | 0 | else |
271 | 0 | { |
272 | | /* An empty bucket has been found. */ |
273 | 0 | void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); |
274 | 0 | insert_entry_2 (htab, keycopy, keylen, hval, idx, data); |
275 | 0 | if (100 * htab->filled > 75 * htab->size) |
276 | | /* Table is filled more than 75%. Resize the table. */ |
277 | 0 | resize (htab); |
278 | 0 | return 0; |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | | |
283 | | /* Steps *PTR forward to the next used entry in the given hash table. *PTR |
284 | | should be initially set to NULL. Store information about the next entry |
285 | | in *KEY, *KEYLEN, *DATA. |
286 | | Return 0 normally, -1 when the whole hash table has been traversed. */ |
287 | | int |
288 | | hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen, |
289 | | void **data) |
290 | 0 | { |
291 | 0 | hash_entry *curr; |
292 | |
|
293 | 0 | if (*ptr == NULL) |
294 | 0 | { |
295 | 0 | if (htab->first == NULL) |
296 | 0 | return -1; |
297 | 0 | curr = htab->first; |
298 | 0 | } |
299 | 0 | else |
300 | 0 | { |
301 | 0 | if (*ptr == htab->first) |
302 | 0 | return -1; |
303 | 0 | curr = (hash_entry *) *ptr; |
304 | 0 | } |
305 | 0 | curr = curr->next; |
306 | 0 | *ptr = (void *) curr; |
307 | |
|
308 | 0 | *key = curr->key; |
309 | 0 | *keylen = curr->keylen; |
310 | 0 | *data = curr->data; |
311 | 0 | return 0; |
312 | 0 | } |
313 | | |
314 | | |
315 | | /* Steps *PTR forward to the next used entry in the given hash table. *PTR |
316 | | should be initially set to NULL. Store information about the next entry |
317 | | in *KEY, *KEYLEN, *DATAP. *DATAP is set to point to the storage of the |
318 | | value; modifying **DATAP will modify the value of the entry. |
319 | | Return 0 normally, -1 when the whole hash table has been traversed. */ |
320 | | int |
321 | | hash_iterate_modify (hash_table *htab, void **ptr, |
322 | | const void **key, size_t *keylen, |
323 | | void ***datap) |
324 | 0 | { |
325 | 0 | hash_entry *curr; |
326 | |
|
327 | 0 | if (*ptr == NULL) |
328 | 0 | { |
329 | 0 | if (htab->first == NULL) |
330 | 0 | return -1; |
331 | 0 | curr = htab->first; |
332 | 0 | } |
333 | 0 | else |
334 | 0 | { |
335 | 0 | if (*ptr == htab->first) |
336 | 0 | return -1; |
337 | 0 | curr = (hash_entry *) *ptr; |
338 | 0 | } |
339 | 0 | curr = curr->next; |
340 | 0 | *ptr = (void *) curr; |
341 | |
|
342 | 0 | *key = curr->key; |
343 | 0 | *keylen = curr->keylen; |
344 | 0 | *datap = &curr->data; |
345 | 0 | return 0; |
346 | 0 | } |