Line | Count | Source (jump to first uncovered line) |
1 | | /* idna.c --- Prototypes for Internationalized Domain Name library. |
2 | | Copyright (C) 2002-2023 Simon Josefsson |
3 | | |
4 | | This file is part of GNU Libidn. |
5 | | |
6 | | GNU Libidn is free software: you can redistribute it and/or |
7 | | modify it under the terms of either: |
8 | | |
9 | | * the GNU Lesser General Public License as published by the Free |
10 | | Software Foundation; either version 3 of the License, or (at |
11 | | your option) any later version. |
12 | | |
13 | | or |
14 | | |
15 | | * the GNU General Public License as published by the Free |
16 | | Software Foundation; either version 2 of the License, or (at |
17 | | your option) any later version. |
18 | | |
19 | | or both in parallel, as here. |
20 | | |
21 | | GNU Libidn is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | | General Public License for more details. |
25 | | |
26 | | You should have received copies of the GNU General Public License and |
27 | | the GNU Lesser General Public License along with this program. If |
28 | | not, see <https://www.gnu.org/licenses/>. */ |
29 | | |
30 | | #ifdef HAVE_CONFIG_H |
31 | | # include "config.h" |
32 | | #endif |
33 | | |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | #include <stringprep.h> |
37 | | #include <punycode.h> |
38 | | |
39 | | #include "idna.h" |
40 | | |
41 | | /* Get c_strcasecmp. */ |
42 | | #include <c-strcase.h> |
43 | | |
44 | 506k | #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ |
45 | 506k | (c) == 0xFF0E || (c) == 0xFF61) |
46 | | |
47 | | /* Core functions */ |
48 | | |
49 | | /** |
50 | | * idna_to_ascii_4i: |
51 | | * @in: input array with unicode code points. |
52 | | * @inlen: length of input array with unicode code points. |
53 | | * @out: output zero terminated string that must have room for at |
54 | | * least 63 characters plus the terminating zero. |
55 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
56 | | * %IDNA_USE_STD3_ASCII_RULES. |
57 | | * |
58 | | * The ToASCII operation takes a sequence of Unicode code points that |
59 | | * make up one domain label and transforms it into a sequence of code |
60 | | * points in the ASCII range (0..7F). If ToASCII succeeds, the |
61 | | * original sequence and the resulting sequence are equivalent labels. |
62 | | * |
63 | | * It is important to note that the ToASCII operation can fail. ToASCII |
64 | | * fails if any step of it fails. If any step of the ToASCII operation |
65 | | * fails on any label in a domain name, that domain name MUST NOT be used |
66 | | * as an internationalized domain name. The method for deadling with this |
67 | | * failure is application-specific. |
68 | | * |
69 | | * The inputs to ToASCII are a sequence of code points, the AllowUnassigned |
70 | | * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a |
71 | | * sequence of ASCII code points or a failure condition. |
72 | | * |
73 | | * ToASCII never alters a sequence of code points that are all in the ASCII |
74 | | * range to begin with (although it could fail). Applying the ToASCII |
75 | | * operation multiple times has exactly the same effect as applying it just |
76 | | * once. |
77 | | * |
78 | | * Return value: Returns 0 on success, or an #Idna_rc error code. |
79 | | */ |
80 | | int |
81 | | idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) |
82 | 12.5k | { |
83 | 12.5k | size_t len, outlen; |
84 | 12.5k | uint32_t *src; /* XXX don't need to copy data? */ |
85 | 12.5k | int rc; |
86 | | |
87 | | /* |
88 | | * ToASCII consists of the following steps: |
89 | | * |
90 | | * 1. If all code points in the sequence are in the ASCII range (0..7F) |
91 | | * then skip to step 3. |
92 | | */ |
93 | | |
94 | 12.5k | { |
95 | 12.5k | size_t i; |
96 | 12.5k | int inasciirange; |
97 | | |
98 | 12.5k | inasciirange = 1; |
99 | 308k | for (i = 0; i < inlen; i++) |
100 | 295k | if (in[i] > 0x7F) |
101 | 246k | inasciirange = 0; |
102 | 12.5k | if (inasciirange) |
103 | 4.74k | { |
104 | 4.74k | src = malloc (sizeof (in[0]) * (inlen + 1)); |
105 | 4.74k | if (src == NULL) |
106 | 0 | return IDNA_MALLOC_ERROR; |
107 | | |
108 | 4.74k | memcpy (src, in, sizeof (in[0]) * inlen); |
109 | 4.74k | src[inlen] = 0; |
110 | | |
111 | 4.74k | goto step3; |
112 | 4.74k | } |
113 | 12.5k | } |
114 | | |
115 | | /* |
116 | | * 2. Perform the steps specified in [NAMEPREP] and fail if there is |
117 | | * an error. The AllowUnassigned flag is used in [NAMEPREP]. |
118 | | */ |
119 | | |
120 | 7.78k | { |
121 | 7.78k | char *p; |
122 | | |
123 | 7.78k | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL); |
124 | 7.78k | if (p == NULL) |
125 | 0 | return IDNA_MALLOC_ERROR; |
126 | | |
127 | 7.78k | len = strlen (p); |
128 | 7.78k | do |
129 | 11.1k | { |
130 | 11.1k | char *newp; |
131 | | |
132 | 11.1k | len = 2 * len + 10; /* XXX better guess? */ |
133 | 11.1k | newp = realloc (p, len); |
134 | 11.1k | if (newp == NULL) |
135 | 0 | { |
136 | 0 | free (p); |
137 | 0 | return IDNA_MALLOC_ERROR; |
138 | 0 | } |
139 | 11.1k | p = newp; |
140 | | |
141 | 11.1k | if (flags & IDNA_ALLOW_UNASSIGNED) |
142 | 5.74k | rc = stringprep_nameprep (p, len); |
143 | 5.42k | else |
144 | 5.42k | rc = stringprep_nameprep_no_unassigned (p, len); |
145 | 11.1k | } |
146 | 11.1k | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
147 | | |
148 | 7.78k | if (rc != STRINGPREP_OK) |
149 | 2.28k | { |
150 | 2.28k | free (p); |
151 | 2.28k | return IDNA_STRINGPREP_ERROR; |
152 | 2.28k | } |
153 | | |
154 | 5.49k | src = stringprep_utf8_to_ucs4 (p, -1, NULL); |
155 | | |
156 | 5.49k | free (p); |
157 | | |
158 | 5.49k | if (!src) |
159 | 0 | return IDNA_MALLOC_ERROR; |
160 | 5.49k | } |
161 | | |
162 | 10.2k | step3: |
163 | | /* |
164 | | * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: |
165 | | * |
166 | | * (a) Verify the absence of non-LDH ASCII code points; that is, |
167 | | * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. |
168 | | * |
169 | | * (b) Verify the absence of leading and trailing hyphen-minus; |
170 | | * that is, the absence of U+002D at the beginning and end of |
171 | | * the sequence. |
172 | | */ |
173 | | |
174 | 10.2k | if (flags & IDNA_USE_STD3_ASCII_RULES) |
175 | 5.45k | { |
176 | 5.45k | size_t i; |
177 | | |
178 | 105k | for (i = 0; src[i]; i++) |
179 | 102k | if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || |
180 | 102k | (src[i] >= 0x3A && src[i] <= 0x40) || |
181 | 102k | (src[i] >= 0x5B && src[i] <= 0x60) || |
182 | 102k | (src[i] >= 0x7B && src[i] <= 0x7F)) |
183 | 2.31k | { |
184 | 2.31k | free (src); |
185 | 2.31k | return IDNA_CONTAINS_NON_LDH; |
186 | 2.31k | } |
187 | | |
188 | 3.14k | if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) |
189 | 465 | { |
190 | 465 | free (src); |
191 | 465 | return IDNA_CONTAINS_MINUS; |
192 | 465 | } |
193 | 3.14k | } |
194 | | |
195 | | /* |
196 | | * 4. If all code points in the sequence are in the ASCII range |
197 | | * (0..7F), then skip to step 8. |
198 | | */ |
199 | | |
200 | 7.46k | { |
201 | 7.46k | size_t i; |
202 | 7.46k | int inasciirange; |
203 | | |
204 | 7.46k | inasciirange = 1; |
205 | 931k | for (i = 0; src[i]; i++) |
206 | 924k | { |
207 | 924k | if (src[i] > 0x7F) |
208 | 768k | inasciirange = 0; |
209 | | /* copy string to output buffer if we are about to skip to step8 */ |
210 | 924k | if (i < 64) |
211 | 101k | out[i] = src[i]; |
212 | 924k | } |
213 | 7.46k | if (i < 64) |
214 | 6.90k | out[i] = '\0'; |
215 | 560 | else |
216 | 560 | { |
217 | 560 | free (src); |
218 | 560 | return IDNA_INVALID_LENGTH; |
219 | 560 | } |
220 | 6.90k | if (inasciirange) |
221 | 3.50k | goto step8; |
222 | 6.90k | } |
223 | | |
224 | | /* |
225 | | * 5. Verify that the sequence does NOT begin with the ACE prefix. |
226 | | * |
227 | | */ |
228 | | |
229 | 3.40k | { |
230 | 3.40k | size_t i; |
231 | 3.40k | int match; |
232 | | |
233 | 3.40k | match = 1; |
234 | 7.71k | for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) |
235 | 4.31k | if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) |
236 | 3.13k | match = 0; |
237 | 3.40k | if (match) |
238 | 262 | { |
239 | 262 | free (src); |
240 | 262 | return IDNA_CONTAINS_ACE_PREFIX; |
241 | 262 | } |
242 | 3.40k | } |
243 | | |
244 | | /* |
245 | | * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] |
246 | | * and fail if there is an error. |
247 | | */ |
248 | 62.3k | for (len = 0; src[len]; len++) |
249 | 59.1k | ; |
250 | 3.13k | src[len] = '\0'; |
251 | 3.13k | outlen = 63 - strlen (IDNA_ACE_PREFIX); |
252 | 3.13k | rc = punycode_encode (len, src, NULL, |
253 | 3.13k | &outlen, &out[strlen (IDNA_ACE_PREFIX)]); |
254 | 3.13k | if (rc != PUNYCODE_SUCCESS) |
255 | 655 | { |
256 | 655 | free (src); |
257 | 655 | return IDNA_PUNYCODE_ERROR; |
258 | 655 | } |
259 | 2.48k | out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; |
260 | | |
261 | | /* |
262 | | * 7. Prepend the ACE prefix. |
263 | | */ |
264 | | |
265 | 2.48k | memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); |
266 | | |
267 | | /* |
268 | | * 8. Verify that the number of code points is in the range 1 to 63 |
269 | | * inclusive (0 is excluded). |
270 | | */ |
271 | | |
272 | 5.99k | step8: |
273 | 5.99k | free (src); |
274 | 5.99k | if (strlen (out) < 1) |
275 | 802 | return IDNA_INVALID_LENGTH; |
276 | | |
277 | 5.18k | return IDNA_SUCCESS; |
278 | 5.99k | } |
279 | | |
280 | | /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */ |
281 | | static int |
282 | | idna_to_unicode_internal (char *utf8in, |
283 | | uint32_t * out, size_t *outlen, int flags) |
284 | 35.2k | { |
285 | 35.2k | int rc; |
286 | 35.2k | char tmpout[64]; |
287 | 35.2k | size_t utf8len = strlen (utf8in) + 1; |
288 | 35.2k | size_t addlen = 0, addinc = utf8len / 10 + 1; |
289 | | |
290 | | /* |
291 | | * ToUnicode consists of the following steps: |
292 | | * |
293 | | * 1. If the sequence contains any code points outside the ASCII range |
294 | | * (0..7F) then proceed to step 2, otherwise skip to step 3. |
295 | | */ |
296 | | |
297 | 35.2k | { |
298 | 35.2k | size_t i; |
299 | 35.2k | int inasciirange; |
300 | | |
301 | 35.2k | inasciirange = 1; |
302 | 695k | for (i = 0; utf8in[i]; i++) |
303 | 660k | if (utf8in[i] & ~0x7F) |
304 | 214k | inasciirange = 0; |
305 | 35.2k | if (inasciirange) |
306 | 21.2k | goto step3; |
307 | 35.2k | } |
308 | | |
309 | | /* |
310 | | * 2. Perform the steps specified in [NAMEPREP] and fail if there is an |
311 | | * error. (If step 3 of ToASCII is also performed here, it will not |
312 | | * affect the overall behavior of ToUnicode, but it is not |
313 | | * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. |
314 | | */ |
315 | 14.0k | do |
316 | 26.2k | { |
317 | 26.2k | char *newp = realloc (utf8in, utf8len + addlen); |
318 | 26.2k | if (newp == NULL) |
319 | 0 | { |
320 | 0 | free (utf8in); |
321 | 0 | return IDNA_MALLOC_ERROR; |
322 | 0 | } |
323 | 26.2k | utf8in = newp; |
324 | 26.2k | if (flags & IDNA_ALLOW_UNASSIGNED) |
325 | 13.8k | rc = stringprep_nameprep (utf8in, utf8len + addlen); |
326 | 12.3k | else |
327 | 12.3k | rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen); |
328 | 26.2k | addlen += addinc; |
329 | 26.2k | addinc *= 2; |
330 | 26.2k | } |
331 | 26.2k | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
332 | | |
333 | 14.0k | if (rc != STRINGPREP_OK) |
334 | 6.27k | { |
335 | 6.27k | free (utf8in); |
336 | 6.27k | return IDNA_STRINGPREP_ERROR; |
337 | 6.27k | } |
338 | | |
339 | | /* 3. Verify that the sequence begins with the ACE prefix, and save a |
340 | | * copy of the sequence. |
341 | | * ... The ToASCII and ToUnicode operations MUST recognize the ACE |
342 | | prefix in a case-insensitive manner. |
343 | | */ |
344 | | |
345 | 29.0k | step3: |
346 | 29.0k | if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0) |
347 | 12.8k | { |
348 | 12.8k | free (utf8in); |
349 | 12.8k | return IDNA_NO_ACE_PREFIX; |
350 | 12.8k | } |
351 | | |
352 | | /* 4. Remove the ACE prefix. |
353 | | */ |
354 | | |
355 | 16.2k | memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)], |
356 | 16.2k | strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1); |
357 | | |
358 | | /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] |
359 | | * and fail if there is an error. Save a copy of the result of |
360 | | * this step. |
361 | | */ |
362 | | |
363 | 16.2k | (*outlen)--; /* reserve one for the zero */ |
364 | | |
365 | 16.2k | rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); |
366 | 16.2k | if (rc != PUNYCODE_SUCCESS) |
367 | 3.67k | { |
368 | 3.67k | free (utf8in); |
369 | 3.67k | return IDNA_PUNYCODE_ERROR; |
370 | 3.67k | } |
371 | | |
372 | 12.5k | out[*outlen] = 0; /* add zero */ |
373 | | |
374 | | /* 6. Apply ToASCII. |
375 | | */ |
376 | | |
377 | 12.5k | rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); |
378 | 12.5k | if (rc != IDNA_SUCCESS) |
379 | 7.33k | { |
380 | 7.33k | free (utf8in); |
381 | 7.33k | return rc; |
382 | 7.33k | } |
383 | | |
384 | | /* 7. Verify that the result of step 6 matches the saved copy from |
385 | | * step 3, using a case-insensitive ASCII comparison. |
386 | | */ |
387 | | |
388 | 5.18k | if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) |
389 | 4.38k | { |
390 | 4.38k | free (utf8in); |
391 | 4.38k | return IDNA_ROUNDTRIP_VERIFY_ERROR; |
392 | 4.38k | } |
393 | | |
394 | | /* 8. Return the saved copy from step 5. |
395 | | */ |
396 | | |
397 | 806 | free (utf8in); |
398 | 806 | return IDNA_SUCCESS; |
399 | 5.18k | } |
400 | | |
401 | | /** |
402 | | * idna_to_unicode_44i: |
403 | | * @in: input array with unicode code points. |
404 | | * @inlen: length of input array with unicode code points. |
405 | | * @out: output array with unicode code points. |
406 | | * @outlen: on input, maximum size of output array with unicode code points, |
407 | | * on exit, actual size of output array with unicode code points. |
408 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
409 | | * %IDNA_USE_STD3_ASCII_RULES. |
410 | | * |
411 | | * The ToUnicode operation takes a sequence of Unicode code points |
412 | | * that make up one domain label and returns a sequence of Unicode |
413 | | * code points. If the input sequence is a label in ACE form, then the |
414 | | * result is an equivalent internationalized label that is not in ACE |
415 | | * form, otherwise the original sequence is returned unaltered. |
416 | | * |
417 | | * ToUnicode never fails. If any step fails, then the original input |
418 | | * sequence is returned immediately in that step. |
419 | | * |
420 | | * The Punycode decoder can never output more code points than it |
421 | | * inputs, but Nameprep can, and therefore ToUnicode can. Note that |
422 | | * the number of octets needed to represent a sequence of code points |
423 | | * depends on the particular character encoding used. |
424 | | * |
425 | | * The inputs to ToUnicode are a sequence of code points, the |
426 | | * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of |
427 | | * ToUnicode is always a sequence of Unicode code points. |
428 | | * |
429 | | * Return value: Returns #Idna_rc error condition, but it must only be |
430 | | * used for debugging purposes. The output buffer is always |
431 | | * guaranteed to contain the correct data according to the |
432 | | * specification (sans malloc induced errors). NB! This means that |
433 | | * you normally ignore the return code from this function, as |
434 | | * checking it means breaking the standard. |
435 | | */ |
436 | | int |
437 | | idna_to_unicode_44i (const uint32_t * in, size_t inlen, |
438 | | uint32_t * out, size_t *outlen, int flags) |
439 | 37.6k | { |
440 | 37.6k | int rc; |
441 | 37.6k | size_t outlensave = *outlen; |
442 | 37.6k | char *p; |
443 | | |
444 | 37.6k | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL); |
445 | 37.6k | if (p == NULL) |
446 | 2.34k | return IDNA_MALLOC_ERROR; |
447 | | |
448 | 35.2k | rc = idna_to_unicode_internal (p, out, outlen, flags); |
449 | 35.2k | if (rc != IDNA_SUCCESS) |
450 | 34.4k | { |
451 | 34.4k | memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? |
452 | 34.4k | inlen : outlensave)); |
453 | 34.4k | *outlen = inlen; |
454 | 34.4k | } |
455 | | |
456 | | /* p is freed in idna_to_unicode_internal. */ |
457 | | |
458 | 35.2k | return rc; |
459 | 37.6k | } |
460 | | |
461 | | /* Wrappers that handle several labels */ |
462 | | |
463 | | /** |
464 | | * idna_to_ascii_4z: |
465 | | * @input: zero terminated input Unicode string. |
466 | | * @output: pointer to newly allocated output string. |
467 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
468 | | * %IDNA_USE_STD3_ASCII_RULES. |
469 | | * |
470 | | * Convert UCS-4 domain name to ASCII string. The domain name may |
471 | | * contain several labels, separated by dots. The output buffer must |
472 | | * be deallocated by the caller. |
473 | | * |
474 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
475 | | **/ |
476 | | int |
477 | | idna_to_ascii_4z (const uint32_t * input, char **output, int flags) |
478 | 0 | { |
479 | 0 | const uint32_t *start = input; |
480 | 0 | const uint32_t *end; |
481 | 0 | char buf[64]; |
482 | 0 | char *out = NULL; |
483 | 0 | int rc; |
484 | | |
485 | | /* 1) Whenever dots are used as label separators, the following |
486 | | characters MUST be recognized as dots: U+002E (full stop), |
487 | | U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), |
488 | | U+FF61 (halfwidth ideographic full stop). */ |
489 | |
|
490 | 0 | if (input[0] == 0) |
491 | 0 | { |
492 | | /* Handle implicit zero-length root label. */ |
493 | 0 | *output = malloc (1); |
494 | 0 | if (!*output) |
495 | 0 | return IDNA_MALLOC_ERROR; |
496 | 0 | strcpy (*output, ""); |
497 | 0 | return IDNA_SUCCESS; |
498 | 0 | } |
499 | | |
500 | 0 | if (DOTP (input[0]) && input[1] == 0) |
501 | 0 | { |
502 | | /* Handle explicit zero-length root label. */ |
503 | 0 | *output = malloc (2); |
504 | 0 | if (!*output) |
505 | 0 | return IDNA_MALLOC_ERROR; |
506 | 0 | strcpy (*output, "."); |
507 | 0 | return IDNA_SUCCESS; |
508 | 0 | } |
509 | | |
510 | 0 | *output = NULL; |
511 | 0 | do |
512 | 0 | { |
513 | 0 | end = start; |
514 | |
|
515 | 0 | for (; *end && !DOTP (*end); end++) |
516 | 0 | ; |
517 | |
|
518 | 0 | if (*end == '\0' && start == end) |
519 | 0 | { |
520 | | /* Handle explicit zero-length root label. */ |
521 | 0 | buf[0] = '\0'; |
522 | 0 | } |
523 | 0 | else |
524 | 0 | { |
525 | 0 | rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags); |
526 | 0 | if (rc != IDNA_SUCCESS) |
527 | 0 | { |
528 | 0 | free (out); |
529 | 0 | return rc; |
530 | 0 | } |
531 | 0 | } |
532 | | |
533 | 0 | if (out) |
534 | 0 | { |
535 | 0 | size_t l = strlen (out) + 1 + strlen (buf) + 1; |
536 | 0 | char *newp = realloc (out, l); |
537 | 0 | if (!newp) |
538 | 0 | { |
539 | 0 | free (out); |
540 | 0 | return IDNA_MALLOC_ERROR; |
541 | 0 | } |
542 | 0 | out = newp; |
543 | 0 | strcat (out, "."); |
544 | 0 | strcat (out, buf); |
545 | 0 | } |
546 | 0 | else |
547 | 0 | { |
548 | 0 | out = strdup (buf); |
549 | 0 | if (!out) |
550 | 0 | return IDNA_MALLOC_ERROR; |
551 | 0 | } |
552 | | |
553 | 0 | start = end + 1; |
554 | 0 | } |
555 | 0 | while (*end); |
556 | | |
557 | 0 | *output = out; |
558 | |
|
559 | 0 | return IDNA_SUCCESS; |
560 | 0 | } |
561 | | |
562 | | /** |
563 | | * idna_to_ascii_8z: |
564 | | * @input: zero terminated input UTF-8 string. |
565 | | * @output: pointer to newly allocated output string. |
566 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
567 | | * %IDNA_USE_STD3_ASCII_RULES. |
568 | | * |
569 | | * Convert UTF-8 domain name to ASCII string. The domain name may |
570 | | * contain several labels, separated by dots. The output buffer must |
571 | | * be deallocated by the caller. |
572 | | * |
573 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
574 | | **/ |
575 | | int |
576 | | idna_to_ascii_8z (const char *input, char **output, int flags) |
577 | 0 | { |
578 | 0 | uint32_t *ucs4; |
579 | 0 | size_t ucs4len; |
580 | 0 | int rc; |
581 | |
|
582 | 0 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); |
583 | 0 | if (!ucs4) |
584 | 0 | return IDNA_ICONV_ERROR; |
585 | | |
586 | 0 | rc = idna_to_ascii_4z (ucs4, output, flags); |
587 | |
|
588 | 0 | free (ucs4); |
589 | |
|
590 | 0 | return rc; |
591 | |
|
592 | 0 | } |
593 | | |
594 | | /** |
595 | | * idna_to_ascii_lz: |
596 | | * @input: zero terminated input string encoded in the current locale's |
597 | | * character set. |
598 | | * @output: pointer to newly allocated output string. |
599 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
600 | | * %IDNA_USE_STD3_ASCII_RULES. |
601 | | * |
602 | | * Convert domain name in the locale's encoding to ASCII string. The |
603 | | * domain name may contain several labels, separated by dots. The |
604 | | * output buffer must be deallocated by the caller. |
605 | | * |
606 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
607 | | **/ |
608 | | int |
609 | | idna_to_ascii_lz (const char *input, char **output, int flags) |
610 | 0 | { |
611 | 0 | char *utf8; |
612 | 0 | int rc; |
613 | |
|
614 | 0 | utf8 = stringprep_locale_to_utf8 (input); |
615 | 0 | if (!utf8) |
616 | 0 | return IDNA_ICONV_ERROR; |
617 | | |
618 | 0 | rc = idna_to_ascii_8z (utf8, output, flags); |
619 | |
|
620 | 0 | free (utf8); |
621 | |
|
622 | 0 | return rc; |
623 | 0 | } |
624 | | |
625 | | /** |
626 | | * idna_to_unicode_4z4z: |
627 | | * @input: zero-terminated Unicode string. |
628 | | * @output: pointer to newly allocated output Unicode string. |
629 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
630 | | * %IDNA_USE_STD3_ASCII_RULES. |
631 | | * |
632 | | * Convert possibly ACE encoded domain name in UCS-4 format into a |
633 | | * UCS-4 string. The domain name may contain several labels, |
634 | | * separated by dots. The output buffer must be deallocated by the |
635 | | * caller. |
636 | | * |
637 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
638 | | **/ |
639 | | int |
640 | | idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) |
641 | 15.4k | { |
642 | 15.4k | const uint32_t *start = input; |
643 | 15.4k | const uint32_t *end; |
644 | 15.4k | uint32_t *buf; |
645 | 15.4k | size_t buflen; |
646 | 15.4k | uint32_t *out = NULL; |
647 | 15.4k | size_t outlen = 0; |
648 | | |
649 | 15.4k | *output = NULL; |
650 | | |
651 | 15.4k | do |
652 | 35.1k | { |
653 | 35.1k | end = start; |
654 | | |
655 | 521k | for (; *end && !DOTP (*end); end++) |
656 | 486k | ; |
657 | | |
658 | 35.1k | buflen = (size_t) (end - start); |
659 | 35.1k | buf = malloc (sizeof (buf[0]) * (buflen + 1)); |
660 | 35.1k | if (!buf) |
661 | 0 | { |
662 | 0 | free (out); |
663 | 0 | return IDNA_MALLOC_ERROR; |
664 | 0 | } |
665 | | |
666 | | /* don't check return code as per specification! */ |
667 | 35.1k | idna_to_unicode_44i (start, (size_t) (end - start), |
668 | 35.1k | buf, &buflen, flags); |
669 | | |
670 | 35.1k | if (out) |
671 | 19.6k | { |
672 | 19.6k | uint32_t *newp = realloc (out, |
673 | 19.6k | sizeof (out[0]) |
674 | 19.6k | * (outlen + 1 + buflen + 1)); |
675 | 19.6k | if (!newp) |
676 | 0 | { |
677 | 0 | free (buf); |
678 | 0 | free (out); |
679 | 0 | return IDNA_MALLOC_ERROR; |
680 | 0 | } |
681 | 19.6k | out = newp; |
682 | 19.6k | out[outlen++] = 0x002E; /* '.' (full stop) */ |
683 | 19.6k | memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); |
684 | 19.6k | outlen += buflen; |
685 | 19.6k | out[outlen] = 0x0; |
686 | 19.6k | free (buf); |
687 | 19.6k | } |
688 | 15.4k | else |
689 | 15.4k | { |
690 | 15.4k | out = buf; |
691 | 15.4k | outlen = buflen; |
692 | 15.4k | out[outlen] = 0x0; |
693 | 15.4k | } |
694 | | |
695 | 35.1k | start = end + 1; |
696 | 35.1k | } |
697 | 35.1k | while (*end); |
698 | | |
699 | 15.4k | *output = out; |
700 | | |
701 | 15.4k | return IDNA_SUCCESS; |
702 | 15.4k | } |
703 | | |
704 | | /** |
705 | | * idna_to_unicode_8z4z: |
706 | | * @input: zero-terminated UTF-8 string. |
707 | | * @output: pointer to newly allocated output Unicode string. |
708 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
709 | | * %IDNA_USE_STD3_ASCII_RULES. |
710 | | * |
711 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
712 | | * UCS-4 string. The domain name may contain several labels, |
713 | | * separated by dots. The output buffer must be deallocated by the |
714 | | * caller. |
715 | | * |
716 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
717 | | **/ |
718 | | int |
719 | | idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) |
720 | 14.9k | { |
721 | 14.9k | uint32_t *ucs4; |
722 | 14.9k | size_t ucs4len; |
723 | 14.9k | int rc; |
724 | | |
725 | 14.9k | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); |
726 | 14.9k | if (!ucs4) |
727 | 1.97k | return IDNA_ICONV_ERROR; |
728 | | |
729 | 12.9k | rc = idna_to_unicode_4z4z (ucs4, output, flags); |
730 | 12.9k | free (ucs4); |
731 | | |
732 | 12.9k | return rc; |
733 | 14.9k | } |
734 | | |
735 | | /** |
736 | | * idna_to_unicode_8z8z: |
737 | | * @input: zero-terminated UTF-8 string. |
738 | | * @output: pointer to newly allocated output UTF-8 string. |
739 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
740 | | * %IDNA_USE_STD3_ASCII_RULES. |
741 | | * |
742 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
743 | | * UTF-8 string. The domain name may contain several labels, |
744 | | * separated by dots. The output buffer must be deallocated by the |
745 | | * caller. |
746 | | * |
747 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
748 | | **/ |
749 | | int |
750 | | idna_to_unicode_8z8z (const char *input, char **output, int flags) |
751 | 12.4k | { |
752 | 12.4k | uint32_t *ucs4; |
753 | 12.4k | int rc; |
754 | | |
755 | 12.4k | rc = idna_to_unicode_8z4z (input, &ucs4, flags); |
756 | 12.4k | if (rc != IDNA_SUCCESS) |
757 | 1.37k | return rc; |
758 | | |
759 | 11.0k | *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL); |
760 | 11.0k | free (ucs4); |
761 | | |
762 | 11.0k | if (!*output) |
763 | 0 | return IDNA_ICONV_ERROR; |
764 | | |
765 | 11.0k | return IDNA_SUCCESS; |
766 | 11.0k | } |
767 | | |
768 | | /** |
769 | | * idna_to_unicode_8zlz: |
770 | | * @input: zero-terminated UTF-8 string. |
771 | | * @output: pointer to newly allocated output string encoded in the |
772 | | * current locale's character set. |
773 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
774 | | * %IDNA_USE_STD3_ASCII_RULES. |
775 | | * |
776 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
777 | | * string encoded in the current locale's character set. The domain |
778 | | * name may contain several labels, separated by dots. The output |
779 | | * buffer must be deallocated by the caller. |
780 | | * |
781 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
782 | | **/ |
783 | | int |
784 | | idna_to_unicode_8zlz (const char *input, char **output, int flags) |
785 | 7.23k | { |
786 | 7.23k | char *utf8; |
787 | 7.23k | int rc; |
788 | | |
789 | 7.23k | rc = idna_to_unicode_8z8z (input, &utf8, flags); |
790 | 7.23k | if (rc != IDNA_SUCCESS) |
791 | 688 | return rc; |
792 | | |
793 | 6.54k | *output = stringprep_utf8_to_locale (utf8); |
794 | 6.54k | free (utf8); |
795 | | |
796 | 6.54k | if (!*output) |
797 | 2.74k | return IDNA_ICONV_ERROR; |
798 | | |
799 | 3.79k | return IDNA_SUCCESS; |
800 | 6.54k | } |
801 | | |
802 | | /** |
803 | | * idna_to_unicode_lzlz: |
804 | | * @input: zero-terminated string encoded in the current locale's |
805 | | * character set. |
806 | | * @output: pointer to newly allocated output string encoded in the |
807 | | * current locale's character set. |
808 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
809 | | * %IDNA_USE_STD3_ASCII_RULES. |
810 | | * |
811 | | * Convert possibly ACE encoded domain name in the locale's character |
812 | | * set into a string encoded in the current locale's character set. |
813 | | * The domain name may contain several labels, separated by dots. The |
814 | | * output buffer must be deallocated by the caller. |
815 | | * |
816 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
817 | | **/ |
818 | | int |
819 | | idna_to_unicode_lzlz (const char *input, char **output, int flags) |
820 | 5.23k | { |
821 | 5.23k | char *utf8; |
822 | 5.23k | int rc; |
823 | | |
824 | 5.23k | utf8 = stringprep_locale_to_utf8 (input); |
825 | 5.23k | if (!utf8) |
826 | 3.22k | return IDNA_ICONV_ERROR; |
827 | | |
828 | 2.00k | rc = idna_to_unicode_8zlz (utf8, output, flags); |
829 | 2.00k | free (utf8); |
830 | | |
831 | 2.00k | return rc; |
832 | 5.23k | } |
833 | | |
834 | | /** |
835 | | * IDNA_ACE_PREFIX |
836 | | * |
837 | | * The IANA allocated prefix to use for IDNA. "xn--" |
838 | | */ |
839 | | |
840 | | /** |
841 | | * Idna_rc: |
842 | | * @IDNA_SUCCESS: Successful operation. This value is guaranteed to |
843 | | * always be zero, the remaining ones are only guaranteed to hold |
844 | | * non-zero values, for logical comparison purposes. |
845 | | * @IDNA_STRINGPREP_ERROR: Error during string preparation. |
846 | | * @IDNA_PUNYCODE_ERROR: Error during punycode operation. |
847 | | * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that |
848 | | * the string contains non-LDH ASCII characters. |
849 | | * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility |
850 | | * with typo in earlier versions. |
851 | | * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that |
852 | | * the string contains a leading or trailing hyphen-minus (U+002D). |
853 | | * @IDNA_INVALID_LENGTH: The final output string is not within the |
854 | | * (inclusive) range 1 to 63 characters. |
855 | | * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix |
856 | | * (for ToUnicode). |
857 | | * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output |
858 | | * string does not equal the input. |
859 | | * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for |
860 | | * ToASCII). |
861 | | * @IDNA_ICONV_ERROR: Character encoding conversion error. |
862 | | * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a |
863 | | * fatal error). |
864 | | * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used |
865 | | * internally in libc). |
866 | | * |
867 | | * Enumerated return codes of idna_to_ascii_4i(), |
868 | | * idna_to_unicode_44i() functions (and functions derived from those |
869 | | * functions). The value 0 is guaranteed to always correspond to |
870 | | * success. |
871 | | */ |
872 | | |
873 | | |
874 | | /** |
875 | | * Idna_flags: |
876 | | * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned |
877 | | * Unicode code points. |
878 | | * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 |
879 | | * rules (i.e., normal host name rules). |
880 | | * |
881 | | * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. |
882 | | */ |