/src/libidn/lib/stringprep.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringprep.c --- Core stringprep implementation. |
2 | | Copyright (C) 2002-2025 Simon Josefsson |
3 | | |
4 | | This file is part of GNU Libidn. |
5 | | |
6 | | GNU Libidn is free software: you can redistribute it and/or |
7 | | modify it under the terms of either: |
8 | | |
9 | | * the GNU Lesser General Public License as published by the Free |
10 | | Software Foundation; either version 3 of the License, or (at |
11 | | your option) any later version. |
12 | | |
13 | | or |
14 | | |
15 | | * the GNU General Public License as published by the Free |
16 | | Software Foundation; either version 2 of the License, or (at |
17 | | your option) any later version. |
18 | | |
19 | | or both in parallel, as here. |
20 | | |
21 | | GNU Libidn is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | | General Public License for more details. |
25 | | |
26 | | You should have received copies of the GNU General Public License and |
27 | | the GNU Lesser General Public License along with this program. If |
28 | | not, see <https://www.gnu.org/licenses/>. */ |
29 | | |
30 | | #ifdef HAVE_CONFIG_H |
31 | | # include "config.h" |
32 | | #endif |
33 | | |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | |
37 | | #include "stringprep.h" |
38 | | |
39 | | static int |
40 | | _compare_table_element (const uint32_t *c, const Stringprep_table_element *e) |
41 | 0 | { |
42 | 0 | if (*c < e->start) |
43 | 0 | return -1; |
44 | 0 | if (*c > e->end) |
45 | 0 | return 1; |
46 | 0 | return 0; |
47 | 0 | } |
48 | | |
49 | | static ssize_t |
50 | | stringprep_find_character_in_table (uint32_t ucs4, |
51 | | const Stringprep_table_element *table, |
52 | | size_t table_size) |
53 | 0 | { |
54 | | /* This is where typical uses of Libidn spends very close to all CPU |
55 | | time and causes most cache misses. One could easily do a binary |
56 | | search instead. Before rewriting this, I want hard evidence this |
57 | | slowness is at all relevant in typical applications. (I don't |
58 | | dispute optimization may improve matters significantly, I'm |
59 | | mostly interested in having someone give real-world benchmark on |
60 | | the impact of libidn.) |
61 | | * |
62 | | * Answer (Tim Rühsen rockdaboot@gmx.de): |
63 | | * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles. |
64 | | * That is almost 20s on my Intel i3 3.1GHz !!! |
65 | | * That even makes fuzzing almost useless, eating up CPU cycles for nothing. |
66 | | * |
67 | | * The bsearch() approach takes ~3 billion CPU cycles. |
68 | | * Almost a factor of 20 faster (but still pretty slow). |
69 | | * There are still ~2 million calls to bsearch() which make ~30% of CPU time used. |
70 | | * Most time is spent in _g_utf8_normalize_wc(). |
71 | | |
72 | | ssize_t i; |
73 | | |
74 | | for (i = 0; table[i].start || table[i].end; i++) |
75 | | if (ucs4 >= table[i].start && |
76 | | ucs4 <= (table[i].end ? table[i].end : table[i].start)) |
77 | | return i; |
78 | | */ |
79 | |
|
80 | 0 | const Stringprep_table_element *p = |
81 | 0 | bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element), |
82 | 0 | (int (*)(const void *, const void *)) _compare_table_element); |
83 | |
|
84 | 0 | return p ? (p - table) : -1; |
85 | 0 | } |
86 | | |
87 | | static ssize_t |
88 | | stringprep_find_string_in_table (uint32_t *ucs4, |
89 | | size_t ucs4len, |
90 | | size_t *tablepos, |
91 | | const Stringprep_table_element *table, |
92 | | size_t table_size) |
93 | 0 | { |
94 | 0 | size_t j; |
95 | 0 | ssize_t pos; |
96 | |
|
97 | 0 | for (j = 0; j < ucs4len; j++) |
98 | 0 | if ((pos = |
99 | 0 | stringprep_find_character_in_table (ucs4[j], table, |
100 | 0 | table_size)) != -1) |
101 | 0 | { |
102 | 0 | if (tablepos) |
103 | 0 | *tablepos = pos; |
104 | 0 | return j; |
105 | 0 | } |
106 | | |
107 | 0 | return -1; |
108 | 0 | } |
109 | | |
110 | | static int |
111 | | stringprep_apply_table_to_string (uint32_t *ucs4, |
112 | | size_t *ucs4len, |
113 | | size_t maxucs4len, |
114 | | const Stringprep_table_element *table, |
115 | | size_t table_size) |
116 | 0 | { |
117 | 0 | ssize_t pos; |
118 | 0 | size_t i, maplen; |
119 | 0 | uint32_t *src = ucs4; /* points to unprocessed data */ |
120 | 0 | size_t srclen = *ucs4len; /* length of unprocessed data */ |
121 | |
|
122 | 0 | while ((pos = stringprep_find_string_in_table (src, srclen, |
123 | 0 | &i, table, |
124 | 0 | table_size)) != -1) |
125 | 0 | { |
126 | 0 | for (maplen = STRINGPREP_MAX_MAP_CHARS; |
127 | 0 | maplen > 0 && table[i].map[maplen - 1] == 0; maplen--) |
128 | 0 | ; |
129 | |
|
130 | 0 | if (*ucs4len - 1 + maplen >= maxucs4len) |
131 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
132 | | |
133 | 0 | memmove (src + pos + maplen, src + pos + 1, |
134 | 0 | sizeof (uint32_t) * (srclen - pos - 1)); |
135 | 0 | memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen); |
136 | 0 | *ucs4len = *ucs4len - 1 + maplen; |
137 | 0 | src += pos + maplen; |
138 | 0 | srclen -= pos + 1; |
139 | 0 | } |
140 | | |
141 | 0 | return STRINGPREP_OK; |
142 | 0 | } |
143 | | |
144 | 0 | #define INVERTED(x) ((x) & ((~0UL) >> 1)) |
145 | | #define UNAPPLICAPLEFLAGS(flags, profileflags) \ |
146 | 0 | ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \ |
147 | 0 | ( INVERTED(profileflags) && (profileflags & flags))) |
148 | | |
149 | | /** |
150 | | * stringprep_4i: |
151 | | * @ucs4: input/output array with string to prepare. |
152 | | * @len: on input, length of input array with Unicode code points, |
153 | | * on exit, length of output array with Unicode code points. |
154 | | * @maxucs4len: maximum length of input/output array. |
155 | | * @flags: a #Stringprep_profile_flags value, or 0. |
156 | | * @profile: pointer to #Stringprep_profile to use. |
157 | | * |
158 | | * Prepare the input UCS-4 string according to the stringprep profile, |
159 | | * and write back the result to the input string. |
160 | | * |
161 | | * The input is not required to be zero terminated (@ucs4[@len] = 0). |
162 | | * The output will not be zero terminated unless @ucs4[@len] = 0. |
163 | | * Instead, see stringprep_4zi() if your input is zero terminated or |
164 | | * if you want the output to be. |
165 | | * |
166 | | * Since the stringprep operation can expand the string, @maxucs4len |
167 | | * indicate how large the buffer holding the string is. This function |
168 | | * will not read or write to code points outside that size. |
169 | | * |
170 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
171 | | * |
172 | | * The @profile contain the #Stringprep_profile instructions to |
173 | | * perform. Your application can define new profiles, possibly |
174 | | * reusing the generic stringprep tables that always will be part of |
175 | | * the library, or use one of the currently supported profiles. |
176 | | * |
177 | | * Return value: Returns %STRINGPREP_OK iff successful, or an |
178 | | * #Stringprep_rc error code. |
179 | | **/ |
180 | | int |
181 | | stringprep_4i (uint32_t *ucs4, size_t *len, size_t maxucs4len, |
182 | | Stringprep_profile_flags flags, |
183 | | const Stringprep_profile *profile) |
184 | 0 | { |
185 | 0 | size_t i, j; |
186 | 0 | ssize_t k; |
187 | 0 | size_t ucs4len = *len; |
188 | 0 | int rc; |
189 | |
|
190 | 0 | for (i = 0; profile[i].operation; i++) |
191 | 0 | { |
192 | 0 | switch (profile[i].operation) |
193 | 0 | { |
194 | 0 | case STRINGPREP_NFKC: |
195 | 0 | { |
196 | 0 | uint32_t *q = 0; |
197 | |
|
198 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
199 | 0 | break; |
200 | | |
201 | 0 | if (flags & STRINGPREP_NO_NFKC && !profile[i].flags) |
202 | | /* Profile requires NFKC, but callee asked for no NFKC. */ |
203 | 0 | return STRINGPREP_FLAG_ERROR; |
204 | | |
205 | 0 | q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len); |
206 | 0 | if (!q) |
207 | 0 | return STRINGPREP_NFKC_FAILED; |
208 | | |
209 | 0 | for (ucs4len = 0; q[ucs4len]; ucs4len++) |
210 | 0 | ; |
211 | |
|
212 | 0 | if (ucs4len >= maxucs4len) |
213 | 0 | { |
214 | 0 | free (q); |
215 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
216 | 0 | } |
217 | | |
218 | 0 | memcpy (ucs4, q, ucs4len * sizeof (ucs4[0])); |
219 | |
|
220 | 0 | free (q); |
221 | 0 | } |
222 | 0 | break; |
223 | | |
224 | 0 | case STRINGPREP_PROHIBIT_TABLE: |
225 | 0 | k = stringprep_find_string_in_table (ucs4, ucs4len, |
226 | 0 | NULL, profile[i].table, |
227 | 0 | profile[i].table_size); |
228 | 0 | if (k != -1) |
229 | 0 | return STRINGPREP_CONTAINS_PROHIBITED; |
230 | 0 | break; |
231 | | |
232 | 0 | case STRINGPREP_UNASSIGNED_TABLE: |
233 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
234 | 0 | break; |
235 | 0 | if (flags & STRINGPREP_NO_UNASSIGNED) |
236 | 0 | { |
237 | 0 | k = stringprep_find_string_in_table |
238 | 0 | (ucs4, ucs4len, NULL, profile[i].table, |
239 | 0 | profile[i].table_size); |
240 | 0 | if (k != -1) |
241 | 0 | return STRINGPREP_CONTAINS_UNASSIGNED; |
242 | 0 | } |
243 | 0 | break; |
244 | | |
245 | 0 | case STRINGPREP_MAP_TABLE: |
246 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
247 | 0 | break; |
248 | 0 | rc = stringprep_apply_table_to_string |
249 | 0 | (ucs4, &ucs4len, maxucs4len, profile[i].table, |
250 | 0 | profile[i].table_size); |
251 | 0 | if (rc != STRINGPREP_OK) |
252 | 0 | return rc; |
253 | 0 | break; |
254 | | |
255 | 0 | case STRINGPREP_BIDI_PROHIBIT_TABLE: |
256 | 0 | case STRINGPREP_BIDI_RAL_TABLE: |
257 | 0 | case STRINGPREP_BIDI_L_TABLE: |
258 | 0 | break; |
259 | | |
260 | 0 | case STRINGPREP_BIDI: |
261 | 0 | { |
262 | 0 | int done_prohibited = 0; |
263 | 0 | int done_ral = 0; |
264 | 0 | int done_l = 0; |
265 | 0 | size_t contains_ral = SIZE_MAX; |
266 | 0 | size_t contains_l = SIZE_MAX; |
267 | |
|
268 | 0 | for (j = 0; profile[j].operation; j++) |
269 | 0 | if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE) |
270 | 0 | { |
271 | 0 | done_prohibited = 1; |
272 | 0 | k = stringprep_find_string_in_table (ucs4, ucs4len, |
273 | 0 | NULL, |
274 | 0 | profile[j].table, |
275 | 0 | profile[j].table_size); |
276 | 0 | if (k != -1) |
277 | 0 | return STRINGPREP_BIDI_CONTAINS_PROHIBITED; |
278 | 0 | } |
279 | 0 | else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE) |
280 | 0 | { |
281 | 0 | done_ral = 1; |
282 | 0 | if (stringprep_find_string_in_table |
283 | 0 | (ucs4, ucs4len, NULL, profile[j].table, |
284 | 0 | profile[j].table_size) != -1) |
285 | 0 | contains_ral = j; |
286 | 0 | } |
287 | 0 | else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE) |
288 | 0 | { |
289 | 0 | done_l = 1; |
290 | 0 | if (stringprep_find_string_in_table |
291 | 0 | (ucs4, ucs4len, NULL, profile[j].table, |
292 | 0 | profile[j].table_size) != -1) |
293 | 0 | contains_l = j; |
294 | 0 | } |
295 | | |
296 | 0 | if (!done_prohibited || !done_ral || !done_l) |
297 | 0 | return STRINGPREP_PROFILE_ERROR; |
298 | | |
299 | 0 | if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX) |
300 | 0 | return STRINGPREP_BIDI_BOTH_L_AND_RAL; |
301 | | |
302 | 0 | if (contains_ral != SIZE_MAX) |
303 | 0 | { |
304 | 0 | if (!(stringprep_find_character_in_table |
305 | 0 | (ucs4[0], profile[contains_ral].table, |
306 | 0 | profile[contains_ral].table_size) != -1 |
307 | 0 | && |
308 | 0 | stringprep_find_character_in_table (ucs4[ucs4len - 1], |
309 | 0 | profile |
310 | 0 | [contains_ral].table, |
311 | 0 | profile |
312 | 0 | [contains_ral].table_size) |
313 | 0 | != -1)) |
314 | 0 | return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | break; |
318 | | |
319 | 0 | default: |
320 | 0 | return STRINGPREP_PROFILE_ERROR; |
321 | 0 | break; |
322 | 0 | } |
323 | 0 | } |
324 | | |
325 | 0 | *len = ucs4len; |
326 | |
|
327 | 0 | return STRINGPREP_OK; |
328 | 0 | } |
329 | | |
330 | | static int |
331 | | stringprep_4zi_1 (uint32_t *ucs4, size_t ucs4len, size_t maxucs4len, |
332 | | Stringprep_profile_flags flags, |
333 | | const Stringprep_profile *profile) |
334 | 0 | { |
335 | 0 | int rc; |
336 | |
|
337 | 0 | rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); |
338 | 0 | if (rc != STRINGPREP_OK) |
339 | 0 | return rc; |
340 | | |
341 | 0 | if (ucs4len >= maxucs4len) |
342 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
343 | | |
344 | 0 | ucs4[ucs4len] = 0; |
345 | |
|
346 | 0 | return STRINGPREP_OK; |
347 | 0 | } |
348 | | |
349 | | /** |
350 | | * stringprep_4zi: |
351 | | * @ucs4: input/output array with zero terminated string to prepare. |
352 | | * @maxucs4len: maximum length of input/output array. |
353 | | * @flags: a #Stringprep_profile_flags value, or 0. |
354 | | * @profile: pointer to #Stringprep_profile to use. |
355 | | * |
356 | | * Prepare the input zero terminated UCS-4 string according to the |
357 | | * stringprep profile, and write back the result to the input string. |
358 | | * |
359 | | * Since the stringprep operation can expand the string, @maxucs4len |
360 | | * indicate how large the buffer holding the string is. This function |
361 | | * will not read or write to code points outside that size. |
362 | | * |
363 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
364 | | * |
365 | | * The @profile contain the #Stringprep_profile instructions to |
366 | | * perform. Your application can define new profiles, possibly |
367 | | * reusing the generic stringprep tables that always will be part of |
368 | | * the library, or use one of the currently supported profiles. |
369 | | * |
370 | | * Return value: Returns %STRINGPREP_OK iff successful, or an |
371 | | * #Stringprep_rc error code. |
372 | | **/ |
373 | | int |
374 | | stringprep_4zi (uint32_t *ucs4, size_t maxucs4len, |
375 | | Stringprep_profile_flags flags, |
376 | | const Stringprep_profile *profile) |
377 | 0 | { |
378 | 0 | size_t ucs4len; |
379 | |
|
380 | 0 | for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++) |
381 | 0 | ; |
382 | |
|
383 | 0 | return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile); |
384 | 0 | } |
385 | | |
386 | | /** |
387 | | * stringprep: |
388 | | * @in: input/output array with string to prepare. |
389 | | * @maxlen: maximum length of input/output array. |
390 | | * @flags: a #Stringprep_profile_flags value, or 0. |
391 | | * @profile: pointer to #Stringprep_profile to use. |
392 | | * |
393 | | * Prepare the input zero terminated UTF-8 string according to the |
394 | | * stringprep profile, and write back the result to the input string. |
395 | | * |
396 | | * Note that you must convert strings entered in the systems locale |
397 | | * into UTF-8 before using this function, see |
398 | | * stringprep_locale_to_utf8(). |
399 | | * |
400 | | * Since the stringprep operation can expand the string, @maxlen |
401 | | * indicate how large the buffer holding the string is. This function |
402 | | * will not read or write to characters outside that size. |
403 | | * |
404 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
405 | | * |
406 | | * The @profile contain the #Stringprep_profile instructions to |
407 | | * perform. Your application can define new profiles, possibly |
408 | | * reusing the generic stringprep tables that always will be part of |
409 | | * the library, or use one of the currently supported profiles. |
410 | | * |
411 | | * Return value: Returns %STRINGPREP_OK iff successful, or an error code. |
412 | | **/ |
413 | | int |
414 | | stringprep (char *in, |
415 | | size_t maxlen, |
416 | | Stringprep_profile_flags flags, const Stringprep_profile *profile) |
417 | 0 | { |
418 | 0 | int rc; |
419 | 0 | char *utf8 = NULL; |
420 | 0 | uint32_t *ucs4 = NULL; |
421 | 0 | size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1; |
422 | |
|
423 | 0 | do |
424 | 0 | { |
425 | 0 | uint32_t *newp; |
426 | |
|
427 | 0 | free (ucs4); |
428 | 0 | ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len); |
429 | 0 | if (ucs4 == NULL) |
430 | 0 | return STRINGPREP_ICONV_ERROR; |
431 | 0 | maxucs4len = ucs4len + adducs4len; |
432 | 0 | newp = realloc (ucs4, maxucs4len * sizeof (uint32_t)); |
433 | 0 | if (!newp) |
434 | 0 | { |
435 | 0 | free (ucs4); |
436 | 0 | return STRINGPREP_MALLOC_ERROR; |
437 | 0 | } |
438 | 0 | ucs4 = newp; |
439 | |
|
440 | 0 | rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); |
441 | 0 | adducs4len *= 2; |
442 | 0 | } |
443 | 0 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
444 | 0 | if (rc != STRINGPREP_OK) |
445 | 0 | { |
446 | 0 | free (ucs4); |
447 | 0 | return rc; |
448 | 0 | } |
449 | | |
450 | 0 | utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0); |
451 | 0 | free (ucs4); |
452 | 0 | if (!utf8) |
453 | 0 | return STRINGPREP_ICONV_ERROR; |
454 | | |
455 | 0 | if (strlen (utf8) >= maxlen) |
456 | 0 | { |
457 | 0 | free (utf8); |
458 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
459 | 0 | } |
460 | | |
461 | 0 | strcpy (in, utf8); /* flawfinder: ignore */ |
462 | |
|
463 | 0 | free (utf8); |
464 | |
|
465 | 0 | return STRINGPREP_OK; |
466 | 0 | } |
467 | | |
468 | | /** |
469 | | * stringprep_profile: |
470 | | * @in: input array with UTF-8 string to prepare. |
471 | | * @out: output variable with pointer to newly allocate string. |
472 | | * @profile: name of stringprep profile to use. |
473 | | * @flags: a #Stringprep_profile_flags value, or 0. |
474 | | * |
475 | | * Prepare the input zero terminated UTF-8 string according to the |
476 | | * stringprep profile, and return the result in a newly allocated |
477 | | * variable. |
478 | | * |
479 | | * Note that you must convert strings entered in the systems locale |
480 | | * into UTF-8 before using this function, see |
481 | | * stringprep_locale_to_utf8(). |
482 | | * |
483 | | * The output @out variable must be deallocated by the caller. |
484 | | * |
485 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
486 | | * |
487 | | * The @profile specifies the name of the stringprep profile to use. |
488 | | * It must be one of the internally supported stringprep profiles. |
489 | | * |
490 | | * Return value: Returns %STRINGPREP_OK iff successful, or an error code. |
491 | | **/ |
492 | | int |
493 | | stringprep_profile (const char *in, |
494 | | char **out, |
495 | | const char *profile, Stringprep_profile_flags flags) |
496 | 0 | { |
497 | 0 | const Stringprep_profiles *p; |
498 | 0 | char *str = NULL; |
499 | 0 | size_t len = strlen (in) + 1, addlen = len / 10 + 1; |
500 | 0 | int rc; |
501 | |
|
502 | 0 | for (p = &stringprep_profiles[0]; p->name; p++) |
503 | 0 | if (strcmp (p->name, profile) == 0) |
504 | 0 | break; |
505 | |
|
506 | 0 | if (!p || !p->name || !p->tables) |
507 | 0 | return STRINGPREP_UNKNOWN_PROFILE; |
508 | | |
509 | 0 | do |
510 | 0 | { |
511 | 0 | free (str); |
512 | 0 | str = (char *) malloc (len); |
513 | 0 | if (str == NULL) |
514 | 0 | return STRINGPREP_MALLOC_ERROR; |
515 | | |
516 | 0 | strcpy (str, in); |
517 | |
|
518 | 0 | rc = stringprep (str, len, flags, p->tables); |
519 | 0 | len += addlen; |
520 | 0 | addlen *= 2; |
521 | 0 | } |
522 | 0 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
523 | | |
524 | 0 | if (rc == STRINGPREP_OK) |
525 | 0 | *out = str; |
526 | 0 | else |
527 | 0 | free (str); |
528 | |
|
529 | 0 | return rc; |
530 | 0 | } |
531 | | |
532 | | /*! \mainpage GNU Internationalized Domain Name Library |
533 | | * |
534 | | * \section intro Introduction |
535 | | * |
536 | | * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA |
537 | | * specifications defined by the IETF Internationalized Domain Names |
538 | | * (IDN) working group, used for internationalized domain names. The |
539 | | * package is available under the GNU Lesser General Public License. |
540 | | * |
541 | | * The library contains a generic Stringprep implementation that does |
542 | | * Unicode 3.2 NFKC normalization, mapping and prohibitation of |
543 | | * characters, and bidirectional character handling. Profiles for |
544 | | * Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII |
545 | | * Compatible Encoding (ACE) via IDNA are supported. A mechanism to |
546 | | * define Top-Level Domain (TLD) specific validation tables, and to |
547 | | * compare strings against those tables, is included. Default tables |
548 | | * for some TLDs are also included. |
549 | | * |
550 | | * The Stringprep API consists of two main functions, one for |
551 | | * converting data from the system's native representation into UTF-8, |
552 | | * and one function to perform the Stringprep processing. Adding a |
553 | | * new Stringprep profile for your application within the API is |
554 | | * straightforward. The Punycode API consists of one encoding |
555 | | * function and one decoding function. The IDNA API consists of the |
556 | | * ToASCII and ToUnicode functions, as well as an high-level interface |
557 | | * for converting entire domain names to and from the ACE encoded |
558 | | * form. The TLD API consists of one set of functions to extract the |
559 | | * TLD name from a domain string, one set of functions to locate the |
560 | | * proper TLD table to use based on the TLD name, and core functions |
561 | | * to validate a string against a TLD table, and some utility wrappers |
562 | | * to perform all the steps in one call. |
563 | | * |
564 | | * The library is used by, e.g., GNU SASL and Shishi to process user |
565 | | * names and passwords. Libidn can be built into GNU Libc to enable a |
566 | | * new system-wide getaddrinfo() flag for IDN processing. |
567 | | * |
568 | | * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix |
569 | | * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows. |
570 | | * Libidn is written in C and (parts of) the API is accessible from C, |
571 | | * C++, Emacs Lisp, Python and Java. |
572 | | * |
573 | | * The project web page:\n |
574 | | * https://www.gnu.org/software/libidn/ |
575 | | * |
576 | | * The software archive:\n |
577 | | * ftp://alpha.gnu.org/pub/gnu/libidn/ |
578 | | * |
579 | | * For more information see:\n |
580 | | * http://www.ietf.org/html.charters/idn-charter.html\n |
581 | | * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n |
582 | | * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n |
583 | | * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n |
584 | | * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n |
585 | | * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n |
586 | | * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n |
587 | | * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n |
588 | | * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n |
589 | | * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n |
590 | | * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n |
591 | | * |
592 | | * Further information and paid contract development:\n |
593 | | * Simon Josefsson <simon@josefsson.org> |
594 | | * |
595 | | * \section examples Examples |
596 | | * |
597 | | * \include example.c |
598 | | * \include example3.c |
599 | | * \include example4.c |
600 | | * \include example5.c |
601 | | */ |
602 | | |
603 | | /** |
604 | | * STRINGPREP_VERSION |
605 | | * |
606 | | * String defined via CPP denoting the header file version number. |
607 | | * Used together with stringprep_check_version() to verify header file |
608 | | * and run-time library consistency. |
609 | | */ |
610 | | |
611 | | /** |
612 | | * STRINGPREP_MAX_MAP_CHARS |
613 | | * |
614 | | * Maximum number of code points that can replace a single code point, |
615 | | * during stringprep mapping. |
616 | | */ |
617 | | |
618 | | /** |
619 | | * Stringprep_rc: |
620 | | * @STRINGPREP_OK: Successful operation. This value is guaranteed to |
621 | | * always be zero, the remaining ones are only guaranteed to hold |
622 | | * non-zero values, for logical comparison purposes. |
623 | | * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode |
624 | | * code points, which is forbidden by the profile. |
625 | | * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points |
626 | | * prohibited by the profile. |
627 | | * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with |
628 | | * conflicting bidirection category. |
629 | | * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character |
630 | | * in string not of proper bidirectional category. |
631 | | * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code |
632 | | * points detected by bidirectional code. |
633 | | * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too |
634 | | * small. This usually indicate a problem in the calling |
635 | | * application. |
636 | | * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent. |
637 | | * This usually indicate an internal error in the library. |
638 | | * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile. |
639 | | * This usually indicate a problem in the calling application. |
640 | | * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not |
641 | | * known to the library. |
642 | | * @STRINGPREP_ICONV_ERROR: Character encoding conversion error. |
643 | | * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed. This |
644 | | * usually indicate an internal error in the library. |
645 | | * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory. This is |
646 | | * usually a fatal error. |
647 | | * |
648 | | * Enumerated return codes of stringprep(), stringprep_profile() |
649 | | * functions (and macros using those functions). The value 0 is |
650 | | * guaranteed to always correspond to success. |
651 | | */ |
652 | | |
653 | | /** |
654 | | * Stringprep_profile_flags: |
655 | | * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as |
656 | | * selecting the non-NFKC case folding tables. Usually the profile |
657 | | * specifies BIDI and NFKC settings, and applications should not |
658 | | * override it unless in special situations. |
659 | | * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile |
660 | | * specifies BIDI and NFKC settings, and applications should not |
661 | | * override it unless in special situations. |
662 | | * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if |
663 | | * string contains unassigned characters according to profile. |
664 | | * |
665 | | * Stringprep profile flags. |
666 | | */ |
667 | | |
668 | | /** |
669 | | * Stringprep_profile_steps: |
670 | | * @STRINGPREP_NFKC: The NFKC step. |
671 | | * @STRINGPREP_BIDI: The BIDI step. |
672 | | * @STRINGPREP_MAP_TABLE: The MAP step. |
673 | | * @STRINGPREP_UNASSIGNED_TABLE: The Unassigned step. |
674 | | * @STRINGPREP_PROHIBIT_TABLE: The Prohibited step. |
675 | | * @STRINGPREP_BIDI_PROHIBIT_TABLE: The BIDI-Prohibited step. |
676 | | * @STRINGPREP_BIDI_RAL_TABLE: The BIDI-RAL step. |
677 | | * @STRINGPREP_BIDI_L_TABLE: The BIDI-L step. |
678 | | * |
679 | | * Various steps in the stringprep algorithm. You really want to |
680 | | * study the source code to understand this one. Only useful if you |
681 | | * want to add another profile. |
682 | | */ |
683 | | |
684 | | /** |
685 | | * stringprep_nameprep: |
686 | | * @in: input/output array with string to prepare. |
687 | | * @maxlen: maximum length of input/output array. |
688 | | * |
689 | | * Prepare the input UTF-8 string according to the nameprep profile. |
690 | | * The AllowUnassigned flag is true, use |
691 | | * stringprep_nameprep_no_unassigned() if you want a false |
692 | | * AllowUnassigned. Returns 0 iff successful, or an error code. |
693 | | **/ |
694 | | |
695 | | /** |
696 | | * stringprep_nameprep_no_unassigned: |
697 | | * @in: input/output array with string to prepare. |
698 | | * @maxlen: maximum length of input/output array. |
699 | | * |
700 | | * Prepare the input UTF-8 string according to the nameprep profile. |
701 | | * The AllowUnassigned flag is false, use stringprep_nameprep() for |
702 | | * true AllowUnassigned. Returns 0 iff successful, or an error code. |
703 | | **/ |
704 | | |
705 | | /** |
706 | | * stringprep_iscsi: |
707 | | * @in: input/output array with string to prepare. |
708 | | * @maxlen: maximum length of input/output array. |
709 | | * |
710 | | * Prepare the input UTF-8 string according to the draft iSCSI |
711 | | * stringprep profile. Returns 0 iff successful, or an error code. |
712 | | **/ |
713 | | |
714 | | /** |
715 | | * stringprep_plain: |
716 | | * @in: input/output array with string to prepare. |
717 | | * @maxlen: maximum length of input/output array. |
718 | | * |
719 | | * Prepare the input UTF-8 string according to the draft SASL |
720 | | * ANONYMOUS profile. Returns 0 iff successful, or an error code. |
721 | | **/ |
722 | | |
723 | | /** |
724 | | * stringprep_kerberos5: |
725 | | * @in: input/output array with string to prepare. |
726 | | * @maxlen: maximum length of input/output array. |
727 | | * |
728 | | * Prepare the input UTF-8 string according to the draft Kerberos 5 |
729 | | * node identifier profile. Returns 0 iff successful, or an error |
730 | | * code. |
731 | | **/ |
732 | | |
733 | | /** |
734 | | * stringprep_xmpp_nodeprep: |
735 | | * @in: input/output array with string to prepare. |
736 | | * @maxlen: maximum length of input/output array. |
737 | | * |
738 | | * Prepare the input UTF-8 string according to the draft XMPP node |
739 | | * identifier profile. Returns 0 iff successful, or an error code. |
740 | | **/ |
741 | | |
742 | | /** |
743 | | * stringprep_xmpp_resourceprep: |
744 | | * @in: input/output array with string to prepare. |
745 | | * @maxlen: maximum length of input/output array. |
746 | | * |
747 | | * Prepare the input UTF-8 string according to the draft XMPP resource |
748 | | * identifier profile. Returns 0 iff successful, or an error code. |
749 | | **/ |