/src/libidn/lib/stringprep.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringprep.c --- Core stringprep implementation. |
2 | | Copyright (C) 2002-2023 Simon Josefsson |
3 | | |
4 | | This file is part of GNU Libidn. |
5 | | |
6 | | GNU Libidn is free software: you can redistribute it and/or |
7 | | modify it under the terms of either: |
8 | | |
9 | | * the GNU Lesser General Public License as published by the Free |
10 | | Software Foundation; either version 3 of the License, or (at |
11 | | your option) any later version. |
12 | | |
13 | | or |
14 | | |
15 | | * the GNU General Public License as published by the Free |
16 | | Software Foundation; either version 2 of the License, or (at |
17 | | your option) any later version. |
18 | | |
19 | | or both in parallel, as here. |
20 | | |
21 | | GNU Libidn is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | | General Public License for more details. |
25 | | |
26 | | You should have received copies of the GNU General Public License and |
27 | | the GNU Lesser General Public License along with this program. If |
28 | | not, see <https://www.gnu.org/licenses/>. */ |
29 | | |
30 | | #ifdef HAVE_CONFIG_H |
31 | | # include "config.h" |
32 | | #endif |
33 | | |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | |
37 | | #include "stringprep.h" |
38 | | |
39 | | static int |
40 | | _compare_table_element (const uint32_t * c, |
41 | | const Stringprep_table_element * e) |
42 | 0 | { |
43 | 0 | if (*c < e->start) |
44 | 0 | return -1; |
45 | 0 | if (*c > e->end) |
46 | 0 | return 1; |
47 | 0 | return 0; |
48 | 0 | } |
49 | | |
50 | | static ssize_t |
51 | | stringprep_find_character_in_table (uint32_t ucs4, |
52 | | const Stringprep_table_element * table, |
53 | | size_t table_size) |
54 | 0 | { |
55 | | /* This is where typical uses of Libidn spends very close to all CPU |
56 | | time and causes most cache misses. One could easily do a binary |
57 | | search instead. Before rewriting this, I want hard evidence this |
58 | | slowness is at all relevant in typical applications. (I don't |
59 | | dispute optimization may improve matters significantly, I'm |
60 | | mostly interested in having someone give real-world benchmark on |
61 | | the impact of libidn.) |
62 | | * |
63 | | * Answer (Tim Rühsen rockdaboot@gmx.de): |
64 | | * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles. |
65 | | * That is almost 20s on my Intel i3 3.1GHz !!! |
66 | | * That even makes fuzzing almost useless, eating up CPU cycles for nothing. |
67 | | * |
68 | | * The bsearch() approach takes ~3 billion CPU cycles. |
69 | | * Almost a factor of 20 faster (but still pretty slow). |
70 | | * There are still ~2 million calls to bsearch() which make ~30% of CPU time used. |
71 | | * Most time is spent in _g_utf8_normalize_wc(). |
72 | | |
73 | | ssize_t i; |
74 | | |
75 | | for (i = 0; table[i].start || table[i].end; i++) |
76 | | if (ucs4 >= table[i].start && |
77 | | ucs4 <= (table[i].end ? table[i].end : table[i].start)) |
78 | | return i; |
79 | | */ |
80 | |
|
81 | 0 | const Stringprep_table_element *p = |
82 | 0 | bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element), |
83 | 0 | (int (*)(const void *, const void *)) _compare_table_element); |
84 | |
|
85 | 0 | return p ? (p - table) : -1; |
86 | 0 | } |
87 | | |
88 | | static ssize_t |
89 | | stringprep_find_string_in_table (uint32_t * ucs4, |
90 | | size_t ucs4len, |
91 | | size_t *tablepos, |
92 | | const Stringprep_table_element * table, |
93 | | size_t table_size) |
94 | 0 | { |
95 | 0 | size_t j; |
96 | 0 | ssize_t pos; |
97 | |
|
98 | 0 | for (j = 0; j < ucs4len; j++) |
99 | 0 | if ((pos = |
100 | 0 | stringprep_find_character_in_table (ucs4[j], table, |
101 | 0 | table_size)) != -1) |
102 | 0 | { |
103 | 0 | if (tablepos) |
104 | 0 | *tablepos = pos; |
105 | 0 | return j; |
106 | 0 | } |
107 | | |
108 | 0 | return -1; |
109 | 0 | } |
110 | | |
111 | | static int |
112 | | stringprep_apply_table_to_string (uint32_t * ucs4, |
113 | | size_t *ucs4len, |
114 | | size_t maxucs4len, |
115 | | const Stringprep_table_element * table, |
116 | | size_t table_size) |
117 | 0 | { |
118 | 0 | ssize_t pos; |
119 | 0 | size_t i, maplen; |
120 | 0 | uint32_t *src = ucs4; /* points to unprocessed data */ |
121 | 0 | size_t srclen = *ucs4len; /* length of unprocessed data */ |
122 | |
|
123 | 0 | while ((pos = stringprep_find_string_in_table (src, srclen, |
124 | 0 | &i, table, |
125 | 0 | table_size)) != -1) |
126 | 0 | { |
127 | 0 | for (maplen = STRINGPREP_MAX_MAP_CHARS; |
128 | 0 | maplen > 0 && table[i].map[maplen - 1] == 0; maplen--) |
129 | 0 | ; |
130 | |
|
131 | 0 | if (*ucs4len - 1 + maplen >= maxucs4len) |
132 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
133 | | |
134 | 0 | memmove (src + pos + maplen, src + pos + 1, |
135 | 0 | sizeof (uint32_t) * (srclen - pos - 1)); |
136 | 0 | memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen); |
137 | 0 | *ucs4len = *ucs4len - 1 + maplen; |
138 | 0 | src += pos + maplen; |
139 | 0 | srclen -= pos + 1; |
140 | 0 | } |
141 | | |
142 | 0 | return STRINGPREP_OK; |
143 | 0 | } |
144 | | |
145 | 0 | #define INVERTED(x) ((x) & ((~0UL) >> 1)) |
146 | | #define UNAPPLICAPLEFLAGS(flags, profileflags) \ |
147 | 0 | ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \ |
148 | 0 | ( INVERTED(profileflags) && (profileflags & flags))) |
149 | | |
150 | | /** |
151 | | * stringprep_4i: |
152 | | * @ucs4: input/output array with string to prepare. |
153 | | * @len: on input, length of input array with Unicode code points, |
154 | | * on exit, length of output array with Unicode code points. |
155 | | * @maxucs4len: maximum length of input/output array. |
156 | | * @flags: a #Stringprep_profile_flags value, or 0. |
157 | | * @profile: pointer to #Stringprep_profile to use. |
158 | | * |
159 | | * Prepare the input UCS-4 string according to the stringprep profile, |
160 | | * and write back the result to the input string. |
161 | | * |
162 | | * The input is not required to be zero terminated (@ucs4[@len] = 0). |
163 | | * The output will not be zero terminated unless @ucs4[@len] = 0. |
164 | | * Instead, see stringprep_4zi() if your input is zero terminated or |
165 | | * if you want the output to be. |
166 | | * |
167 | | * Since the stringprep operation can expand the string, @maxucs4len |
168 | | * indicate how large the buffer holding the string is. This function |
169 | | * will not read or write to code points outside that size. |
170 | | * |
171 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
172 | | * |
173 | | * The @profile contain the #Stringprep_profile instructions to |
174 | | * perform. Your application can define new profiles, possibly |
175 | | * re-using the generic stringprep tables that always will be part of |
176 | | * the library, or use one of the currently supported profiles. |
177 | | * |
178 | | * Return value: Returns %STRINGPREP_OK iff successful, or an |
179 | | * #Stringprep_rc error code. |
180 | | **/ |
181 | | int |
182 | | stringprep_4i (uint32_t * ucs4, size_t *len, size_t maxucs4len, |
183 | | Stringprep_profile_flags flags, |
184 | | const Stringprep_profile * profile) |
185 | 0 | { |
186 | 0 | size_t i, j; |
187 | 0 | ssize_t k; |
188 | 0 | size_t ucs4len = *len; |
189 | 0 | int rc; |
190 | |
|
191 | 0 | for (i = 0; profile[i].operation; i++) |
192 | 0 | { |
193 | 0 | switch (profile[i].operation) |
194 | 0 | { |
195 | 0 | case STRINGPREP_NFKC: |
196 | 0 | { |
197 | 0 | uint32_t *q = 0; |
198 | |
|
199 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
200 | 0 | break; |
201 | | |
202 | 0 | if (flags & STRINGPREP_NO_NFKC && !profile[i].flags) |
203 | | /* Profile requires NFKC, but callee asked for no NFKC. */ |
204 | 0 | return STRINGPREP_FLAG_ERROR; |
205 | | |
206 | 0 | q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len); |
207 | 0 | if (!q) |
208 | 0 | return STRINGPREP_NFKC_FAILED; |
209 | | |
210 | 0 | for (ucs4len = 0; q[ucs4len]; ucs4len++) |
211 | 0 | ; |
212 | |
|
213 | 0 | if (ucs4len >= maxucs4len) |
214 | 0 | { |
215 | 0 | free (q); |
216 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
217 | 0 | } |
218 | | |
219 | 0 | memcpy (ucs4, q, ucs4len * sizeof (ucs4[0])); |
220 | |
|
221 | 0 | free (q); |
222 | 0 | } |
223 | 0 | break; |
224 | | |
225 | 0 | case STRINGPREP_PROHIBIT_TABLE: |
226 | 0 | k = stringprep_find_string_in_table (ucs4, ucs4len, |
227 | 0 | NULL, profile[i].table, |
228 | 0 | profile[i].table_size); |
229 | 0 | if (k != -1) |
230 | 0 | return STRINGPREP_CONTAINS_PROHIBITED; |
231 | 0 | break; |
232 | | |
233 | 0 | case STRINGPREP_UNASSIGNED_TABLE: |
234 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
235 | 0 | break; |
236 | 0 | if (flags & STRINGPREP_NO_UNASSIGNED) |
237 | 0 | { |
238 | 0 | k = stringprep_find_string_in_table |
239 | 0 | (ucs4, ucs4len, NULL, profile[i].table, |
240 | 0 | profile[i].table_size); |
241 | 0 | if (k != -1) |
242 | 0 | return STRINGPREP_CONTAINS_UNASSIGNED; |
243 | 0 | } |
244 | 0 | break; |
245 | | |
246 | 0 | case STRINGPREP_MAP_TABLE: |
247 | 0 | if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) |
248 | 0 | break; |
249 | 0 | rc = stringprep_apply_table_to_string |
250 | 0 | (ucs4, &ucs4len, maxucs4len, profile[i].table, |
251 | 0 | profile[i].table_size); |
252 | 0 | if (rc != STRINGPREP_OK) |
253 | 0 | return rc; |
254 | 0 | break; |
255 | | |
256 | 0 | case STRINGPREP_BIDI_PROHIBIT_TABLE: |
257 | 0 | case STRINGPREP_BIDI_RAL_TABLE: |
258 | 0 | case STRINGPREP_BIDI_L_TABLE: |
259 | 0 | break; |
260 | | |
261 | 0 | case STRINGPREP_BIDI: |
262 | 0 | { |
263 | 0 | int done_prohibited = 0; |
264 | 0 | int done_ral = 0; |
265 | 0 | int done_l = 0; |
266 | 0 | size_t contains_ral = SIZE_MAX; |
267 | 0 | size_t contains_l = SIZE_MAX; |
268 | |
|
269 | 0 | for (j = 0; profile[j].operation; j++) |
270 | 0 | if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE) |
271 | 0 | { |
272 | 0 | done_prohibited = 1; |
273 | 0 | k = stringprep_find_string_in_table (ucs4, ucs4len, |
274 | 0 | NULL, |
275 | 0 | profile[j].table, |
276 | 0 | profile[j].table_size); |
277 | 0 | if (k != -1) |
278 | 0 | return STRINGPREP_BIDI_CONTAINS_PROHIBITED; |
279 | 0 | } |
280 | 0 | else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE) |
281 | 0 | { |
282 | 0 | done_ral = 1; |
283 | 0 | if (stringprep_find_string_in_table |
284 | 0 | (ucs4, ucs4len, NULL, profile[j].table, |
285 | 0 | profile[j].table_size) != -1) |
286 | 0 | contains_ral = j; |
287 | 0 | } |
288 | 0 | else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE) |
289 | 0 | { |
290 | 0 | done_l = 1; |
291 | 0 | if (stringprep_find_string_in_table |
292 | 0 | (ucs4, ucs4len, NULL, profile[j].table, |
293 | 0 | profile[j].table_size) != -1) |
294 | 0 | contains_l = j; |
295 | 0 | } |
296 | | |
297 | 0 | if (!done_prohibited || !done_ral || !done_l) |
298 | 0 | return STRINGPREP_PROFILE_ERROR; |
299 | | |
300 | 0 | if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX) |
301 | 0 | return STRINGPREP_BIDI_BOTH_L_AND_RAL; |
302 | | |
303 | 0 | if (contains_ral != SIZE_MAX) |
304 | 0 | { |
305 | 0 | if (!(stringprep_find_character_in_table |
306 | 0 | (ucs4[0], profile[contains_ral].table, |
307 | 0 | profile[contains_ral].table_size) != -1 |
308 | 0 | && |
309 | 0 | stringprep_find_character_in_table (ucs4[ucs4len - 1], |
310 | 0 | profile |
311 | 0 | [contains_ral].table, |
312 | 0 | profile |
313 | 0 | [contains_ral].table_size) |
314 | 0 | != -1)) |
315 | 0 | return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL; |
316 | 0 | } |
317 | 0 | } |
318 | 0 | break; |
319 | | |
320 | 0 | default: |
321 | 0 | return STRINGPREP_PROFILE_ERROR; |
322 | 0 | break; |
323 | 0 | } |
324 | 0 | } |
325 | | |
326 | 0 | *len = ucs4len; |
327 | |
|
328 | 0 | return STRINGPREP_OK; |
329 | 0 | } |
330 | | |
331 | | static int |
332 | | stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len, |
333 | | Stringprep_profile_flags flags, |
334 | | const Stringprep_profile * profile) |
335 | 0 | { |
336 | 0 | int rc; |
337 | |
|
338 | 0 | rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); |
339 | 0 | if (rc != STRINGPREP_OK) |
340 | 0 | return rc; |
341 | | |
342 | 0 | if (ucs4len >= maxucs4len) |
343 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
344 | | |
345 | 0 | ucs4[ucs4len] = 0; |
346 | |
|
347 | 0 | return STRINGPREP_OK; |
348 | 0 | } |
349 | | |
350 | | /** |
351 | | * stringprep_4zi: |
352 | | * @ucs4: input/output array with zero terminated string to prepare. |
353 | | * @maxucs4len: maximum length of input/output array. |
354 | | * @flags: a #Stringprep_profile_flags value, or 0. |
355 | | * @profile: pointer to #Stringprep_profile to use. |
356 | | * |
357 | | * Prepare the input zero terminated UCS-4 string according to the |
358 | | * stringprep profile, and write back the result to the input string. |
359 | | * |
360 | | * Since the stringprep operation can expand the string, @maxucs4len |
361 | | * indicate how large the buffer holding the string is. This function |
362 | | * will not read or write to code points outside that size. |
363 | | * |
364 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
365 | | * |
366 | | * The @profile contain the #Stringprep_profile instructions to |
367 | | * perform. Your application can define new profiles, possibly |
368 | | * re-using the generic stringprep tables that always will be part of |
369 | | * the library, or use one of the currently supported profiles. |
370 | | * |
371 | | * Return value: Returns %STRINGPREP_OK iff successful, or an |
372 | | * #Stringprep_rc error code. |
373 | | **/ |
374 | | int |
375 | | stringprep_4zi (uint32_t * ucs4, size_t maxucs4len, |
376 | | Stringprep_profile_flags flags, |
377 | | const Stringprep_profile * profile) |
378 | 0 | { |
379 | 0 | size_t ucs4len; |
380 | |
|
381 | 0 | for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++) |
382 | 0 | ; |
383 | |
|
384 | 0 | return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile); |
385 | 0 | } |
386 | | |
387 | | /** |
388 | | * stringprep: |
389 | | * @in: input/ouput array with string to prepare. |
390 | | * @maxlen: maximum length of input/output array. |
391 | | * @flags: a #Stringprep_profile_flags value, or 0. |
392 | | * @profile: pointer to #Stringprep_profile to use. |
393 | | * |
394 | | * Prepare the input zero terminated UTF-8 string according to the |
395 | | * stringprep profile, and write back the result to the input string. |
396 | | * |
397 | | * Note that you must convert strings entered in the systems locale |
398 | | * into UTF-8 before using this function, see |
399 | | * stringprep_locale_to_utf8(). |
400 | | * |
401 | | * Since the stringprep operation can expand the string, @maxlen |
402 | | * indicate how large the buffer holding the string is. This function |
403 | | * will not read or write to characters outside that size. |
404 | | * |
405 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
406 | | * |
407 | | * The @profile contain the #Stringprep_profile instructions to |
408 | | * perform. Your application can define new profiles, possibly |
409 | | * re-using the generic stringprep tables that always will be part of |
410 | | * the library, or use one of the currently supported profiles. |
411 | | * |
412 | | * Return value: Returns %STRINGPREP_OK iff successful, or an error code. |
413 | | **/ |
414 | | int |
415 | | stringprep (char *in, |
416 | | size_t maxlen, |
417 | | Stringprep_profile_flags flags, |
418 | | const Stringprep_profile * profile) |
419 | 0 | { |
420 | 0 | int rc; |
421 | 0 | char *utf8 = NULL; |
422 | 0 | uint32_t *ucs4 = NULL; |
423 | 0 | size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1; |
424 | |
|
425 | 0 | do |
426 | 0 | { |
427 | 0 | uint32_t *newp; |
428 | |
|
429 | 0 | free (ucs4); |
430 | 0 | ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len); |
431 | 0 | if (ucs4 == NULL) |
432 | 0 | return STRINGPREP_ICONV_ERROR; |
433 | 0 | maxucs4len = ucs4len + adducs4len; |
434 | 0 | newp = realloc (ucs4, maxucs4len * sizeof (uint32_t)); |
435 | 0 | if (!newp) |
436 | 0 | { |
437 | 0 | free (ucs4); |
438 | 0 | return STRINGPREP_MALLOC_ERROR; |
439 | 0 | } |
440 | 0 | ucs4 = newp; |
441 | |
|
442 | 0 | rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); |
443 | 0 | adducs4len *= 2; |
444 | 0 | } |
445 | 0 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
446 | 0 | if (rc != STRINGPREP_OK) |
447 | 0 | { |
448 | 0 | free (ucs4); |
449 | 0 | return rc; |
450 | 0 | } |
451 | | |
452 | 0 | utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0); |
453 | 0 | free (ucs4); |
454 | 0 | if (!utf8) |
455 | 0 | return STRINGPREP_ICONV_ERROR; |
456 | | |
457 | 0 | if (strlen (utf8) >= maxlen) |
458 | 0 | { |
459 | 0 | free (utf8); |
460 | 0 | return STRINGPREP_TOO_SMALL_BUFFER; |
461 | 0 | } |
462 | | |
463 | 0 | strcpy (in, utf8); /* flawfinder: ignore */ |
464 | |
|
465 | 0 | free (utf8); |
466 | |
|
467 | 0 | return STRINGPREP_OK; |
468 | 0 | } |
469 | | |
470 | | /** |
471 | | * stringprep_profile: |
472 | | * @in: input array with UTF-8 string to prepare. |
473 | | * @out: output variable with pointer to newly allocate string. |
474 | | * @profile: name of stringprep profile to use. |
475 | | * @flags: a #Stringprep_profile_flags value, or 0. |
476 | | * |
477 | | * Prepare the input zero terminated UTF-8 string according to the |
478 | | * stringprep profile, and return the result in a newly allocated |
479 | | * variable. |
480 | | * |
481 | | * Note that you must convert strings entered in the systems locale |
482 | | * into UTF-8 before using this function, see |
483 | | * stringprep_locale_to_utf8(). |
484 | | * |
485 | | * The output @out variable must be deallocated by the caller. |
486 | | * |
487 | | * The @flags are one of #Stringprep_profile_flags values, or 0. |
488 | | * |
489 | | * The @profile specifies the name of the stringprep profile to use. |
490 | | * It must be one of the internally supported stringprep profiles. |
491 | | * |
492 | | * Return value: Returns %STRINGPREP_OK iff successful, or an error code. |
493 | | **/ |
494 | | int |
495 | | stringprep_profile (const char *in, |
496 | | char **out, |
497 | | const char *profile, Stringprep_profile_flags flags) |
498 | 0 | { |
499 | 0 | const Stringprep_profiles *p; |
500 | 0 | char *str = NULL; |
501 | 0 | size_t len = strlen (in) + 1, addlen = len / 10 + 1; |
502 | 0 | int rc; |
503 | |
|
504 | 0 | for (p = &stringprep_profiles[0]; p->name; p++) |
505 | 0 | if (strcmp (p->name, profile) == 0) |
506 | 0 | break; |
507 | |
|
508 | 0 | if (!p || !p->name || !p->tables) |
509 | 0 | return STRINGPREP_UNKNOWN_PROFILE; |
510 | | |
511 | 0 | do |
512 | 0 | { |
513 | 0 | free (str); |
514 | 0 | str = (char *) malloc (len); |
515 | 0 | if (str == NULL) |
516 | 0 | return STRINGPREP_MALLOC_ERROR; |
517 | | |
518 | 0 | strcpy (str, in); |
519 | |
|
520 | 0 | rc = stringprep (str, len, flags, p->tables); |
521 | 0 | len += addlen; |
522 | 0 | addlen *= 2; |
523 | 0 | } |
524 | 0 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
525 | | |
526 | 0 | if (rc == STRINGPREP_OK) |
527 | 0 | *out = str; |
528 | 0 | else |
529 | 0 | free (str); |
530 | |
|
531 | 0 | return rc; |
532 | 0 | } |
533 | | |
534 | | /*! \mainpage GNU Internationalized Domain Name Library |
535 | | * |
536 | | * \section intro Introduction |
537 | | * |
538 | | * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA |
539 | | * specifications defined by the IETF Internationalized Domain Names |
540 | | * (IDN) working group, used for internationalized domain names. The |
541 | | * package is available under the GNU Lesser General Public License. |
542 | | * |
543 | | * The library contains a generic Stringprep implementation that does |
544 | | * Unicode 3.2 NFKC normalization, mapping and prohibitation of |
545 | | * characters, and bidirectional character handling. Profiles for |
546 | | * Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII |
547 | | * Compatible Encoding (ACE) via IDNA are supported. A mechanism to |
548 | | * define Top-Level Domain (TLD) specific validation tables, and to |
549 | | * compare strings against those tables, is included. Default tables |
550 | | * for some TLDs are also included. |
551 | | * |
552 | | * The Stringprep API consists of two main functions, one for |
553 | | * converting data from the system's native representation into UTF-8, |
554 | | * and one function to perform the Stringprep processing. Adding a |
555 | | * new Stringprep profile for your application within the API is |
556 | | * straightforward. The Punycode API consists of one encoding |
557 | | * function and one decoding function. The IDNA API consists of the |
558 | | * ToASCII and ToUnicode functions, as well as an high-level interface |
559 | | * for converting entire domain names to and from the ACE encoded |
560 | | * form. The TLD API consists of one set of functions to extract the |
561 | | * TLD name from a domain string, one set of functions to locate the |
562 | | * proper TLD table to use based on the TLD name, and core functions |
563 | | * to validate a string against a TLD table, and some utility wrappers |
564 | | * to perform all the steps in one call. |
565 | | * |
566 | | * The library is used by, e.g., GNU SASL and Shishi to process user |
567 | | * names and passwords. Libidn can be built into GNU Libc to enable a |
568 | | * new system-wide getaddrinfo() flag for IDN processing. |
569 | | * |
570 | | * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix |
571 | | * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows. |
572 | | * Libidn is written in C and (parts of) the API is accessible from C, |
573 | | * C++, Emacs Lisp, Python and Java. |
574 | | * |
575 | | * The project web page:\n |
576 | | * https://www.gnu.org/software/libidn/ |
577 | | * |
578 | | * The software archive:\n |
579 | | * ftp://alpha.gnu.org/pub/gnu/libidn/ |
580 | | * |
581 | | * For more information see:\n |
582 | | * http://www.ietf.org/html.charters/idn-charter.html\n |
583 | | * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n |
584 | | * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n |
585 | | * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n |
586 | | * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n |
587 | | * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n |
588 | | * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n |
589 | | * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n |
590 | | * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n |
591 | | * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n |
592 | | * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n |
593 | | * |
594 | | * Further information and paid contract development:\n |
595 | | * Simon Josefsson <simon@josefsson.org> |
596 | | * |
597 | | * \section examples Examples |
598 | | * |
599 | | * \include example.c |
600 | | * \include example3.c |
601 | | * \include example4.c |
602 | | * \include example5.c |
603 | | */ |
604 | | |
605 | | /** |
606 | | * STRINGPREP_VERSION |
607 | | * |
608 | | * String defined via CPP denoting the header file version number. |
609 | | * Used together with stringprep_check_version() to verify header file |
610 | | * and run-time library consistency. |
611 | | */ |
612 | | |
613 | | /** |
614 | | * STRINGPREP_MAX_MAP_CHARS |
615 | | * |
616 | | * Maximum number of code points that can replace a single code point, |
617 | | * during stringprep mapping. |
618 | | */ |
619 | | |
620 | | /** |
621 | | * Stringprep_rc: |
622 | | * @STRINGPREP_OK: Successful operation. This value is guaranteed to |
623 | | * always be zero, the remaining ones are only guaranteed to hold |
624 | | * non-zero values, for logical comparison purposes. |
625 | | * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode |
626 | | * code points, which is forbidden by the profile. |
627 | | * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points |
628 | | * prohibited by the profile. |
629 | | * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with |
630 | | * conflicting bidirection category. |
631 | | * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character |
632 | | * in string not of proper bidirectional category. |
633 | | * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code |
634 | | * points detected by bidirectional code. |
635 | | * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too |
636 | | * small. This usually indicate a problem in the calling |
637 | | * application. |
638 | | * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent. |
639 | | * This usually indicate an internal error in the library. |
640 | | * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile. |
641 | | * This usually indicate a problem in the calling application. |
642 | | * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not |
643 | | * known to the library. |
644 | | * @STRINGPREP_ICONV_ERROR: Character encoding conversion error. |
645 | | * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed. This |
646 | | * usually indicate an internal error in the library. |
647 | | * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory. This is |
648 | | * usually a fatal error. |
649 | | * |
650 | | * Enumerated return codes of stringprep(), stringprep_profile() |
651 | | * functions (and macros using those functions). The value 0 is |
652 | | * guaranteed to always correspond to success. |
653 | | */ |
654 | | |
655 | | /** |
656 | | * Stringprep_profile_flags: |
657 | | * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as |
658 | | * selecting the non-NFKC case folding tables. Usually the profile |
659 | | * specifies BIDI and NFKC settings, and applications should not |
660 | | * override it unless in special situations. |
661 | | * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile |
662 | | * specifies BIDI and NFKC settings, and applications should not |
663 | | * override it unless in special situations. |
664 | | * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if |
665 | | * string contains unassigned characters according to profile. |
666 | | * |
667 | | * Stringprep profile flags. |
668 | | */ |
669 | | |
670 | | /** |
671 | | * Stringprep_profile_steps: |
672 | | * @STRINGPREP_NFKC: The NFKC step. |
673 | | * @STRINGPREP_BIDI: The BIDI step. |
674 | | * @STRINGPREP_MAP_TABLE: The MAP step. |
675 | | * @STRINGPREP_UNASSIGNED_TABLE: The Unassigned step. |
676 | | * @STRINGPREP_PROHIBIT_TABLE: The Prohibited step. |
677 | | * @STRINGPREP_BIDI_PROHIBIT_TABLE: The BIDI-Prohibited step. |
678 | | * @STRINGPREP_BIDI_RAL_TABLE: The BIDI-RAL step. |
679 | | * @STRINGPREP_BIDI_L_TABLE: The BIDI-L step. |
680 | | * |
681 | | * Various steps in the stringprep algorithm. You really want to |
682 | | * study the source code to understand this one. Only useful if you |
683 | | * want to add another profile. |
684 | | */ |
685 | | |
686 | | /** |
687 | | * stringprep_nameprep: |
688 | | * @in: input/ouput array with string to prepare. |
689 | | * @maxlen: maximum length of input/output array. |
690 | | * |
691 | | * Prepare the input UTF-8 string according to the nameprep profile. |
692 | | * The AllowUnassigned flag is true, use |
693 | | * stringprep_nameprep_no_unassigned() if you want a false |
694 | | * AllowUnassigned. Returns 0 iff successful, or an error code. |
695 | | **/ |
696 | | |
697 | | /** |
698 | | * stringprep_nameprep_no_unassigned: |
699 | | * @in: input/ouput array with string to prepare. |
700 | | * @maxlen: maximum length of input/output array. |
701 | | * |
702 | | * Prepare the input UTF-8 string according to the nameprep profile. |
703 | | * The AllowUnassigned flag is false, use stringprep_nameprep() for |
704 | | * true AllowUnassigned. Returns 0 iff successful, or an error code. |
705 | | **/ |
706 | | |
707 | | /** |
708 | | * stringprep_iscsi: |
709 | | * @in: input/ouput array with string to prepare. |
710 | | * @maxlen: maximum length of input/output array. |
711 | | * |
712 | | * Prepare the input UTF-8 string according to the draft iSCSI |
713 | | * stringprep profile. Returns 0 iff successful, or an error code. |
714 | | **/ |
715 | | |
716 | | /** |
717 | | * stringprep_plain: |
718 | | * @in: input/ouput array with string to prepare. |
719 | | * @maxlen: maximum length of input/output array. |
720 | | * |
721 | | * Prepare the input UTF-8 string according to the draft SASL |
722 | | * ANONYMOUS profile. Returns 0 iff successful, or an error code. |
723 | | **/ |
724 | | |
725 | | /** |
726 | | * stringprep_kerberos5: |
727 | | * @in: input/ouput array with string to prepare. |
728 | | * @maxlen: maximum length of input/output array. |
729 | | * |
730 | | * Prepare the input UTF-8 string according to the draft Kerberos 5 |
731 | | * node identifier profile. Returns 0 iff successful, or an error |
732 | | * code. |
733 | | **/ |
734 | | |
735 | | /** |
736 | | * stringprep_xmpp_nodeprep: |
737 | | * @in: input/ouput array with string to prepare. |
738 | | * @maxlen: maximum length of input/output array. |
739 | | * |
740 | | * Prepare the input UTF-8 string according to the draft XMPP node |
741 | | * identifier profile. Returns 0 iff successful, or an error code. |
742 | | **/ |
743 | | |
744 | | /** |
745 | | * stringprep_xmpp_resourceprep: |
746 | | * @in: input/output array with string to prepare. |
747 | | * @maxlen: maximum length of input/output array. |
748 | | * |
749 | | * Prepare the input UTF-8 string according to the draft XMPP resource |
750 | | * identifier profile. Returns 0 iff successful, or an error code. |
751 | | **/ |