Line | Count | Source |
1 | | /* idna.c --- Prototypes for Internationalized Domain Name library. |
2 | | Copyright (C) 2002-2026 Simon Josefsson |
3 | | |
4 | | This file is part of GNU Libidn. |
5 | | |
6 | | GNU Libidn is free software: you can redistribute it and/or |
7 | | modify it under the terms of either: |
8 | | |
9 | | * the GNU Lesser General Public License as published by the Free |
10 | | Software Foundation; either version 3 of the License, or (at |
11 | | your option) any later version. |
12 | | |
13 | | or |
14 | | |
15 | | * the GNU General Public License as published by the Free |
16 | | Software Foundation; either version 2 of the License, or (at |
17 | | your option) any later version. |
18 | | |
19 | | or both in parallel, as here. |
20 | | |
21 | | GNU Libidn is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | | General Public License for more details. |
25 | | |
26 | | You should have received copies of the GNU General Public License and |
27 | | the GNU Lesser General Public License along with this program. If |
28 | | not, see <https://www.gnu.org/licenses/>. */ |
29 | | |
30 | | #ifdef HAVE_CONFIG_H |
31 | | # include "config.h" |
32 | | #endif |
33 | | |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | #include <stringprep.h> |
37 | | #include <punycode.h> |
38 | | |
39 | | #include "idna.h" |
40 | | |
41 | | /* Get c_strcasecmp. */ |
42 | | #include <c-strcase.h> |
43 | | |
44 | 78.2k | #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ |
45 | 73.0k | (c) == 0xFF0E || (c) == 0xFF61) |
46 | | |
47 | | /* Core functions */ |
48 | | |
49 | | /** |
50 | | * idna_to_ascii_4i: |
51 | | * @in: input array with unicode code points. |
52 | | * @inlen: length of input array with unicode code points. |
53 | | * @out: output zero terminated string that must have room for at |
54 | | * least 63 characters plus the terminating zero. |
55 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
56 | | * %IDNA_USE_STD3_ASCII_RULES. |
57 | | * |
58 | | * The ToASCII operation takes a sequence of Unicode code points that |
59 | | * make up one domain label and transforms it into a sequence of code |
60 | | * points in the ASCII range (0..7F). If ToASCII succeeds, the |
61 | | * original sequence and the resulting sequence are equivalent labels. |
62 | | * |
63 | | * It is important to note that the ToASCII operation can fail. ToASCII |
64 | | * fails if any step of it fails. If any step of the ToASCII operation |
65 | | * fails on any label in a domain name, that domain name MUST NOT be used |
66 | | * as an internationalized domain name. The method for deadling with this |
67 | | * failure is application-specific. |
68 | | * |
69 | | * The inputs to ToASCII are a sequence of code points, the AllowUnassigned |
70 | | * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a |
71 | | * sequence of ASCII code points or a failure condition. |
72 | | * |
73 | | * ToASCII never alters a sequence of code points that are all in the ASCII |
74 | | * range to begin with (although it could fail). Applying the ToASCII |
75 | | * operation multiple times has exactly the same effect as applying it just |
76 | | * once. |
77 | | * |
78 | | * Return value: Returns 0 on success, or an #Idna_rc error code. |
79 | | */ |
80 | | int |
81 | | idna_to_ascii_4i (const uint32_t *in, size_t inlen, char *out, int flags) |
82 | 10.6k | { |
83 | 10.6k | size_t len, outlen; |
84 | 10.6k | uint32_t *src; /* XXX don't need to copy data? */ |
85 | 10.6k | int rc; |
86 | | |
87 | | /* |
88 | | * ToASCII consists of the following steps: |
89 | | * |
90 | | * 1. If all code points in the sequence are in the ASCII range (0..7F) |
91 | | * then skip to step 3. |
92 | | */ |
93 | | |
94 | 10.6k | { |
95 | 10.6k | size_t i; |
96 | 10.6k | int inasciirange; |
97 | | |
98 | 10.6k | inasciirange = 1; |
99 | 81.5k | for (i = 0; i < inlen; i++) |
100 | 70.8k | if (in[i] > 0x7F) |
101 | 31.7k | inasciirange = 0; |
102 | 10.6k | if (inasciirange) |
103 | 2.82k | { |
104 | 2.82k | src = malloc (sizeof (in[0]) * (inlen + 1)); |
105 | 2.82k | if (src == NULL) |
106 | 0 | return IDNA_MALLOC_ERROR; |
107 | | |
108 | 2.82k | memcpy (src, in, sizeof (in[0]) * inlen); |
109 | 2.82k | src[inlen] = 0; |
110 | | |
111 | 2.82k | goto step3; |
112 | 2.82k | } |
113 | 10.6k | } |
114 | | |
115 | | /* |
116 | | * 2. Perform the steps specified in [NAMEPREP] and fail if there is |
117 | | * an error. The AllowUnassigned flag is used in [NAMEPREP]. |
118 | | */ |
119 | | |
120 | 7.82k | { |
121 | 7.82k | char *p; |
122 | | |
123 | 7.82k | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL); |
124 | 7.82k | if (p == NULL) |
125 | 1.51k | return IDNA_MALLOC_ERROR; |
126 | | |
127 | 6.30k | len = strlen (p); |
128 | 6.30k | do |
129 | 8.02k | { |
130 | 8.02k | char *newp; |
131 | | |
132 | 8.02k | len = 2 * len + 10; /* XXX better guess? */ |
133 | 8.02k | newp = realloc (p, len); |
134 | 8.02k | if (newp == NULL) |
135 | 0 | { |
136 | 0 | free (p); |
137 | 0 | return IDNA_MALLOC_ERROR; |
138 | 0 | } |
139 | 8.02k | p = newp; |
140 | | |
141 | 8.02k | if (flags & IDNA_ALLOW_UNASSIGNED) |
142 | 3.83k | rc = stringprep_nameprep (p, len); |
143 | 4.19k | else |
144 | 4.19k | rc = stringprep_nameprep_no_unassigned (p, len); |
145 | 8.02k | } |
146 | 8.02k | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
147 | | |
148 | 6.30k | if (rc != STRINGPREP_OK) |
149 | 1.77k | { |
150 | 1.77k | free (p); |
151 | 1.77k | return IDNA_STRINGPREP_ERROR; |
152 | 1.77k | } |
153 | | |
154 | 4.53k | src = stringprep_utf8_to_ucs4 (p, -1, NULL); |
155 | | |
156 | 4.53k | free (p); |
157 | | |
158 | 4.53k | if (!src) |
159 | 0 | return IDNA_MALLOC_ERROR; |
160 | 4.53k | } |
161 | | |
162 | 7.35k | step3: |
163 | | /* |
164 | | * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: |
165 | | * |
166 | | * (a) Verify the absence of non-LDH ASCII code points; that is, |
167 | | * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. |
168 | | * |
169 | | * (b) Verify the absence of leading and trailing hyphen-minus; |
170 | | * that is, the absence of U+002D at the beginning and end of |
171 | | * the sequence. |
172 | | */ |
173 | | |
174 | 7.35k | if (flags & IDNA_USE_STD3_ASCII_RULES) |
175 | 3.64k | { |
176 | 3.64k | size_t i; |
177 | | |
178 | 19.3k | for (i = 0; src[i]; i++) |
179 | 16.5k | if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || |
180 | 15.9k | (src[i] >= 0x3A && src[i] <= 0x40) || |
181 | 15.9k | (src[i] >= 0x5B && src[i] <= 0x60) || |
182 | 15.8k | (src[i] >= 0x7B && src[i] <= 0x7F)) |
183 | 784 | { |
184 | 784 | free (src); |
185 | 784 | return IDNA_CONTAINS_NON_LDH; |
186 | 784 | } |
187 | | |
188 | 2.86k | if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) |
189 | 47 | { |
190 | 47 | free (src); |
191 | 47 | return IDNA_CONTAINS_MINUS; |
192 | 47 | } |
193 | 2.86k | } |
194 | | |
195 | | /* |
196 | | * 4. If all code points in the sequence are in the ASCII range |
197 | | * (0..7F), then skip to step 8. |
198 | | */ |
199 | | |
200 | 6.52k | { |
201 | 6.52k | size_t i; |
202 | 6.52k | int inasciirange; |
203 | | |
204 | 6.52k | inasciirange = 1; |
205 | 98.5k | for (i = 0; src[i]; i++) |
206 | 92.0k | { |
207 | 92.0k | if (src[i] > 0x7F) |
208 | 58.4k | inasciirange = 0; |
209 | | /* copy string to output buffer if we are about to skip to step8 */ |
210 | 92.0k | if (i < 64) |
211 | 30.8k | out[i] = src[i]; |
212 | 92.0k | } |
213 | 6.52k | if (i < 64) |
214 | 6.41k | out[i] = '\0'; |
215 | 111 | else |
216 | 111 | { |
217 | 111 | free (src); |
218 | 111 | return IDNA_INVALID_LENGTH; |
219 | 111 | } |
220 | 6.41k | if (inasciirange) |
221 | 2.43k | goto step8; |
222 | 6.41k | } |
223 | | |
224 | | /* |
225 | | * 5. Verify that the sequence does NOT begin with the ACE prefix. |
226 | | * |
227 | | */ |
228 | | |
229 | 3.98k | { |
230 | 3.98k | size_t i; |
231 | 3.98k | int match; |
232 | | |
233 | 3.98k | match = 1; |
234 | 8.42k | for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) |
235 | 4.44k | if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) |
236 | 3.96k | match = 0; |
237 | 3.98k | if (match) |
238 | 18 | { |
239 | 18 | free (src); |
240 | 18 | return IDNA_CONTAINS_ACE_PREFIX; |
241 | 18 | } |
242 | 3.98k | } |
243 | | |
244 | | /* |
245 | | * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] |
246 | | * and fail if there is an error. |
247 | | */ |
248 | 24.2k | for (len = 0; src[len]; len++) |
249 | 20.3k | ; |
250 | 3.96k | src[len] = '\0'; |
251 | 3.96k | outlen = 63 - strlen (IDNA_ACE_PREFIX); |
252 | 3.96k | rc = punycode_encode (len, src, NULL, |
253 | 3.96k | &outlen, &out[strlen (IDNA_ACE_PREFIX)]); |
254 | 3.96k | if (rc != PUNYCODE_SUCCESS) |
255 | 55 | { |
256 | 55 | free (src); |
257 | 55 | return IDNA_PUNYCODE_ERROR; |
258 | 55 | } |
259 | 3.90k | out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; |
260 | | |
261 | | /* |
262 | | * 7. Prepend the ACE prefix. |
263 | | */ |
264 | | |
265 | 3.90k | memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); |
266 | | |
267 | | /* |
268 | | * 8. Verify that the number of code points is in the range 1 to 63 |
269 | | * inclusive (0 is excluded). |
270 | | */ |
271 | | |
272 | 6.34k | step8: |
273 | 6.34k | free (src); |
274 | 6.34k | if (strlen (out) < 1) |
275 | 205 | return IDNA_INVALID_LENGTH; |
276 | | |
277 | 6.13k | return IDNA_SUCCESS; |
278 | 6.34k | } |
279 | | |
280 | | /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */ |
281 | | static int |
282 | | idna_to_unicode_internal (char *utf8in, |
283 | | uint32_t *out, size_t *outlen, int flags) |
284 | 0 | { |
285 | 0 | int rc; |
286 | 0 | char tmpout[64]; |
287 | 0 | size_t utf8len = strlen (utf8in) + 1; |
288 | 0 | size_t addlen = 0, addinc = utf8len / 10 + 1; |
289 | | |
290 | | /* |
291 | | * ToUnicode consists of the following steps: |
292 | | * |
293 | | * 1. If the sequence contains any code points outside the ASCII range |
294 | | * (0..7F) then proceed to step 2, otherwise skip to step 3. |
295 | | */ |
296 | |
|
297 | 0 | { |
298 | 0 | size_t i; |
299 | 0 | int inasciirange; |
300 | |
|
301 | 0 | inasciirange = 1; |
302 | 0 | for (i = 0; utf8in[i]; i++) |
303 | 0 | if (utf8in[i] & ~0x7F) |
304 | 0 | inasciirange = 0; |
305 | 0 | if (inasciirange) |
306 | 0 | goto step3; |
307 | 0 | } |
308 | | |
309 | | /* |
310 | | * 2. Perform the steps specified in [NAMEPREP] and fail if there is an |
311 | | * error. (If step 3 of ToASCII is also performed here, it will not |
312 | | * affect the overall behavior of ToUnicode, but it is not |
313 | | * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. |
314 | | */ |
315 | 0 | do |
316 | 0 | { |
317 | 0 | char *newp = realloc (utf8in, utf8len + addlen); |
318 | 0 | if (newp == NULL) |
319 | 0 | { |
320 | 0 | free (utf8in); |
321 | 0 | return IDNA_MALLOC_ERROR; |
322 | 0 | } |
323 | 0 | utf8in = newp; |
324 | 0 | if (flags & IDNA_ALLOW_UNASSIGNED) |
325 | 0 | rc = stringprep_nameprep (utf8in, utf8len + addlen); |
326 | 0 | else |
327 | 0 | rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen); |
328 | 0 | addlen += addinc; |
329 | 0 | addinc *= 2; |
330 | 0 | } |
331 | 0 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); |
332 | | |
333 | 0 | if (rc != STRINGPREP_OK) |
334 | 0 | { |
335 | 0 | free (utf8in); |
336 | 0 | return IDNA_STRINGPREP_ERROR; |
337 | 0 | } |
338 | | |
339 | | /* 3. Verify that the sequence begins with the ACE prefix, and save a |
340 | | * copy of the sequence. |
341 | | * ... The ToASCII and ToUnicode operations MUST recognize the ACE |
342 | | prefix in a case-insensitive manner. |
343 | | */ |
344 | | |
345 | 0 | step3: |
346 | 0 | if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0) |
347 | 0 | { |
348 | 0 | free (utf8in); |
349 | 0 | return IDNA_NO_ACE_PREFIX; |
350 | 0 | } |
351 | | |
352 | | /* 4. Remove the ACE prefix. |
353 | | */ |
354 | | |
355 | 0 | memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)], |
356 | 0 | strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1); |
357 | | |
358 | | /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] |
359 | | * and fail if there is an error. Save a copy of the result of |
360 | | * this step. |
361 | | */ |
362 | |
|
363 | 0 | (*outlen)--; /* reserve one for the zero */ |
364 | |
|
365 | 0 | rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); |
366 | 0 | if (rc != PUNYCODE_SUCCESS) |
367 | 0 | { |
368 | 0 | free (utf8in); |
369 | 0 | return IDNA_PUNYCODE_ERROR; |
370 | 0 | } |
371 | | |
372 | 0 | out[*outlen] = 0; /* add zero */ |
373 | | |
374 | | /* 6. Apply ToASCII. |
375 | | */ |
376 | |
|
377 | 0 | rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); |
378 | 0 | if (rc != IDNA_SUCCESS) |
379 | 0 | { |
380 | 0 | free (utf8in); |
381 | 0 | return rc; |
382 | 0 | } |
383 | | |
384 | | /* 7. Verify that the result of step 6 matches the saved copy from |
385 | | * step 3, using a case-insensitive ASCII comparison. |
386 | | */ |
387 | | |
388 | 0 | if (c_strncasecmp (tmpout, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0 |
389 | 0 | || c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) |
390 | 0 | { |
391 | 0 | free (utf8in); |
392 | 0 | return IDNA_ROUNDTRIP_VERIFY_ERROR; |
393 | 0 | } |
394 | | |
395 | | /* 8. Return the saved copy from step 5. |
396 | | */ |
397 | | |
398 | 0 | free (utf8in); |
399 | 0 | return IDNA_SUCCESS; |
400 | 0 | } |
401 | | |
402 | | /** |
403 | | * idna_to_unicode_44i: |
404 | | * @in: input array with unicode code points. |
405 | | * @inlen: length of input array with unicode code points. |
406 | | * @out: output array with unicode code points. |
407 | | * @outlen: on input, maximum size of output array with unicode code points, |
408 | | * on exit, actual size of output array with unicode code points. |
409 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
410 | | * %IDNA_USE_STD3_ASCII_RULES. |
411 | | * |
412 | | * The ToUnicode operation takes a sequence of Unicode code points |
413 | | * that make up one domain label and returns a sequence of Unicode |
414 | | * code points. If the input sequence is a label in ACE form, then the |
415 | | * result is an equivalent internationalized label that is not in ACE |
416 | | * form, otherwise the original sequence is returned unaltered. |
417 | | * |
418 | | * ToUnicode never fails. If any step fails, then the original input |
419 | | * sequence is returned immediately in that step. |
420 | | * |
421 | | * The Punycode decoder can never output more code points than it |
422 | | * inputs, but Nameprep can, and therefore ToUnicode can. Note that |
423 | | * the number of octets needed to represent a sequence of code points |
424 | | * depends on the particular character encoding used. |
425 | | * |
426 | | * The inputs to ToUnicode are a sequence of code points, the |
427 | | * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of |
428 | | * ToUnicode is always a sequence of Unicode code points. |
429 | | * |
430 | | * Return value: Returns #Idna_rc error condition, but it must only be |
431 | | * used for debugging purposes. The output buffer is always |
432 | | * guaranteed to contain the correct data according to the |
433 | | * specification (sans malloc induced errors). NB! This means that |
434 | | * you normally ignore the return code from this function, as |
435 | | * checking it means breaking the standard. |
436 | | */ |
437 | | int |
438 | | idna_to_unicode_44i (const uint32_t *in, size_t inlen, |
439 | | uint32_t *out, size_t *outlen, int flags) |
440 | 0 | { |
441 | 0 | int rc; |
442 | 0 | size_t outlensave = *outlen; |
443 | 0 | char *p; |
444 | |
|
445 | 0 | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL); |
446 | 0 | if (p == NULL) |
447 | 0 | return IDNA_MALLOC_ERROR; |
448 | | |
449 | 0 | rc = idna_to_unicode_internal (p, out, outlen, flags); |
450 | 0 | if (rc != IDNA_SUCCESS) |
451 | 0 | { |
452 | 0 | memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? |
453 | 0 | inlen : outlensave)); |
454 | 0 | *outlen = inlen; |
455 | 0 | } |
456 | | |
457 | | /* p is freed in idna_to_unicode_internal. */ |
458 | |
|
459 | 0 | return rc; |
460 | 0 | } |
461 | | |
462 | | /* Wrappers that handle several labels */ |
463 | | |
464 | | /** |
465 | | * idna_to_ascii_4z: |
466 | | * @input: zero terminated input Unicode string. |
467 | | * @output: pointer to newly allocated output string. |
468 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
469 | | * %IDNA_USE_STD3_ASCII_RULES. |
470 | | * |
471 | | * Convert UCS-4 domain name to ASCII string. The domain name may |
472 | | * contain several labels, separated by dots. The output buffer must |
473 | | * be deallocated by the caller. |
474 | | * |
475 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
476 | | **/ |
477 | | int |
478 | | idna_to_ascii_4z (const uint32_t *input, char **output, int flags) |
479 | 5.41k | { |
480 | 5.41k | const uint32_t *start = input; |
481 | 5.41k | const uint32_t *end; |
482 | 5.41k | char buf[64]; |
483 | 5.41k | char *out = NULL; |
484 | 5.41k | int rc; |
485 | | |
486 | | /* 1) Whenever dots are used as label separators, the following |
487 | | characters MUST be recognized as dots: U+002E (full stop), |
488 | | U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), |
489 | | U+FF61 (halfwidth ideographic full stop). */ |
490 | | |
491 | 5.41k | if (input[0] == 0) |
492 | 154 | { |
493 | | /* Handle implicit zero-length root label. */ |
494 | 154 | *output = malloc (1); |
495 | 154 | if (!*output) |
496 | 0 | return IDNA_MALLOC_ERROR; |
497 | 154 | strcpy (*output, ""); |
498 | 154 | return IDNA_SUCCESS; |
499 | 154 | } |
500 | | |
501 | 5.26k | if (DOTP (input[0]) && input[1] == 0) |
502 | 40 | { |
503 | | /* Handle explicit zero-length root label. */ |
504 | 40 | *output = malloc (2); |
505 | 40 | if (!*output) |
506 | 0 | return IDNA_MALLOC_ERROR; |
507 | 40 | strcpy (*output, "."); |
508 | 40 | return IDNA_SUCCESS; |
509 | 40 | } |
510 | | |
511 | 5.22k | *output = NULL; |
512 | 5.22k | do |
513 | 9.04k | { |
514 | 9.04k | end = start; |
515 | | |
516 | 72.5k | for (; *end && !DOTP (*end); end++) |
517 | 63.5k | ; |
518 | | |
519 | 9.04k | if (*end == '\0' && start == end) |
520 | 43 | { |
521 | | /* Handle explicit zero-length root label. */ |
522 | 43 | buf[0] = '\0'; |
523 | 43 | } |
524 | 8.99k | else |
525 | 8.99k | { |
526 | 8.99k | rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags); |
527 | 8.99k | if (rc != IDNA_SUCCESS) |
528 | 3.11k | { |
529 | 3.11k | free (out); |
530 | 3.11k | return rc; |
531 | 3.11k | } |
532 | 8.99k | } |
533 | | |
534 | 5.92k | if (out) |
535 | 3.67k | { |
536 | 3.67k | size_t l = strlen (out) + 1 + strlen (buf) + 1; |
537 | 3.67k | char *newp = realloc (out, l); |
538 | 3.67k | if (!newp) |
539 | 0 | { |
540 | 0 | free (out); |
541 | 0 | return IDNA_MALLOC_ERROR; |
542 | 0 | } |
543 | 3.67k | out = newp; |
544 | 3.67k | strcat (out, "."); |
545 | 3.67k | strcat (out, buf); |
546 | 3.67k | } |
547 | 2.25k | else |
548 | 2.25k | { |
549 | 2.25k | out = strdup (buf); |
550 | 2.25k | if (!out) |
551 | 0 | return IDNA_MALLOC_ERROR; |
552 | 2.25k | } |
553 | | |
554 | 5.92k | start = end + 1; |
555 | 5.92k | } |
556 | 5.92k | while (*end); |
557 | | |
558 | 2.10k | *output = out; |
559 | | |
560 | 2.10k | return IDNA_SUCCESS; |
561 | 5.22k | } |
562 | | |
563 | | /** |
564 | | * idna_to_ascii_8z: |
565 | | * @input: zero terminated input UTF-8 string. |
566 | | * @output: pointer to newly allocated output string. |
567 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
568 | | * %IDNA_USE_STD3_ASCII_RULES. |
569 | | * |
570 | | * Convert UTF-8 domain name to ASCII string. The domain name may |
571 | | * contain several labels, separated by dots. The output buffer must |
572 | | * be deallocated by the caller. |
573 | | * |
574 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
575 | | **/ |
576 | | int |
577 | | idna_to_ascii_8z (const char *input, char **output, int flags) |
578 | 4.42k | { |
579 | 4.42k | uint32_t *ucs4; |
580 | 4.42k | size_t ucs4len; |
581 | 4.42k | int rc; |
582 | | |
583 | 4.42k | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); |
584 | 4.42k | if (!ucs4) |
585 | 656 | return IDNA_ICONV_ERROR; |
586 | | |
587 | 3.77k | rc = idna_to_ascii_4z (ucs4, output, flags); |
588 | | |
589 | 3.77k | free (ucs4); |
590 | | |
591 | 3.77k | return rc; |
592 | | |
593 | 4.42k | } |
594 | | |
595 | | /** |
596 | | * idna_to_ascii_lz: |
597 | | * @input: zero terminated input string encoded in the current locale's |
598 | | * character set. |
599 | | * @output: pointer to newly allocated output string. |
600 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
601 | | * %IDNA_USE_STD3_ASCII_RULES. |
602 | | * |
603 | | * Convert domain name in the locale's encoding to ASCII string. The |
604 | | * domain name may contain several labels, separated by dots. The |
605 | | * output buffer must be deallocated by the caller. |
606 | | * |
607 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
608 | | **/ |
609 | | int |
610 | | idna_to_ascii_lz (const char *input, char **output, int flags) |
611 | 3.88k | { |
612 | 3.88k | char *utf8; |
613 | 3.88k | int rc; |
614 | | |
615 | 3.88k | utf8 = stringprep_locale_to_utf8 (input); |
616 | 3.88k | if (!utf8) |
617 | 3.33k | return IDNA_ICONV_ERROR; |
618 | | |
619 | 546 | rc = idna_to_ascii_8z (utf8, output, flags); |
620 | | |
621 | 546 | free (utf8); |
622 | | |
623 | 546 | return rc; |
624 | 3.88k | } |
625 | | |
626 | | /** |
627 | | * idna_to_unicode_4z4z: |
628 | | * @input: zero-terminated Unicode string. |
629 | | * @output: pointer to newly allocated output Unicode string. |
630 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
631 | | * %IDNA_USE_STD3_ASCII_RULES. |
632 | | * |
633 | | * Convert possibly ACE encoded domain name in UCS-4 format into a |
634 | | * UCS-4 string. The domain name may contain several labels, |
635 | | * separated by dots. The output buffer must be deallocated by the |
636 | | * caller. |
637 | | * |
638 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
639 | | **/ |
640 | | int |
641 | | idna_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags) |
642 | 0 | { |
643 | 0 | const uint32_t *start = input; |
644 | 0 | const uint32_t *end; |
645 | 0 | uint32_t *buf; |
646 | 0 | size_t buflen; |
647 | 0 | uint32_t *out = NULL; |
648 | 0 | size_t outlen = 0; |
649 | 0 | int rc; |
650 | |
|
651 | 0 | *output = NULL; |
652 | |
|
653 | 0 | do |
654 | 0 | { |
655 | 0 | end = start; |
656 | |
|
657 | 0 | for (; *end && !DOTP (*end); end++) |
658 | 0 | ; |
659 | |
|
660 | 0 | buflen = (size_t) (end - start); |
661 | 0 | buf = malloc (sizeof (buf[0]) * (buflen + 1)); |
662 | 0 | if (!buf) |
663 | 0 | { |
664 | 0 | free (out); |
665 | 0 | return IDNA_MALLOC_ERROR; |
666 | 0 | } |
667 | | |
668 | | /* don't check for non-malloc return codes as per |
669 | | specification! */ |
670 | 0 | rc = idna_to_unicode_44i (start, (size_t) (end - start), |
671 | 0 | buf, &buflen, flags); |
672 | 0 | if (rc == IDNA_MALLOC_ERROR) |
673 | 0 | { |
674 | 0 | free (out); |
675 | 0 | return IDNA_MALLOC_ERROR; |
676 | 0 | } |
677 | | |
678 | 0 | if (out) |
679 | 0 | { |
680 | 0 | uint32_t *newp = realloc (out, |
681 | 0 | sizeof (out[0]) |
682 | 0 | * (outlen + 1 + buflen + 1)); |
683 | 0 | if (!newp) |
684 | 0 | { |
685 | 0 | free (buf); |
686 | 0 | free (out); |
687 | 0 | return IDNA_MALLOC_ERROR; |
688 | 0 | } |
689 | 0 | out = newp; |
690 | 0 | out[outlen++] = 0x002E; /* '.' (full stop) */ |
691 | 0 | memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); |
692 | 0 | outlen += buflen; |
693 | 0 | out[outlen] = 0x0; |
694 | 0 | free (buf); |
695 | 0 | } |
696 | 0 | else |
697 | 0 | { |
698 | 0 | out = buf; |
699 | 0 | outlen = buflen; |
700 | 0 | out[outlen] = 0x0; |
701 | 0 | } |
702 | | |
703 | 0 | start = end + 1; |
704 | 0 | } |
705 | 0 | while (*end); |
706 | | |
707 | 0 | *output = out; |
708 | |
|
709 | 0 | return IDNA_SUCCESS; |
710 | 0 | } |
711 | | |
712 | | /** |
713 | | * idna_to_unicode_8z4z: |
714 | | * @input: zero-terminated UTF-8 string. |
715 | | * @output: pointer to newly allocated output Unicode string. |
716 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
717 | | * %IDNA_USE_STD3_ASCII_RULES. |
718 | | * |
719 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
720 | | * UCS-4 string. The domain name may contain several labels, |
721 | | * separated by dots. The output buffer must be deallocated by the |
722 | | * caller. |
723 | | * |
724 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
725 | | **/ |
726 | | int |
727 | | idna_to_unicode_8z4z (const char *input, uint32_t **output, int flags) |
728 | 0 | { |
729 | 0 | uint32_t *ucs4; |
730 | 0 | size_t ucs4len; |
731 | 0 | int rc; |
732 | |
|
733 | 0 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); |
734 | 0 | if (!ucs4) |
735 | 0 | return IDNA_ICONV_ERROR; |
736 | | |
737 | 0 | rc = idna_to_unicode_4z4z (ucs4, output, flags); |
738 | 0 | free (ucs4); |
739 | |
|
740 | 0 | return rc; |
741 | 0 | } |
742 | | |
743 | | /** |
744 | | * idna_to_unicode_8z8z: |
745 | | * @input: zero-terminated UTF-8 string. |
746 | | * @output: pointer to newly allocated output UTF-8 string. |
747 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
748 | | * %IDNA_USE_STD3_ASCII_RULES. |
749 | | * |
750 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
751 | | * UTF-8 string. The domain name may contain several labels, |
752 | | * separated by dots. The output buffer must be deallocated by the |
753 | | * caller. |
754 | | * |
755 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
756 | | **/ |
757 | | int |
758 | | idna_to_unicode_8z8z (const char *input, char **output, int flags) |
759 | 0 | { |
760 | 0 | uint32_t *ucs4; |
761 | 0 | int rc; |
762 | |
|
763 | 0 | rc = idna_to_unicode_8z4z (input, &ucs4, flags); |
764 | 0 | if (rc != IDNA_SUCCESS) |
765 | 0 | return rc; |
766 | | |
767 | 0 | *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL); |
768 | 0 | free (ucs4); |
769 | |
|
770 | 0 | if (!*output) |
771 | 0 | return IDNA_ICONV_ERROR; |
772 | | |
773 | 0 | return IDNA_SUCCESS; |
774 | 0 | } |
775 | | |
776 | | /** |
777 | | * idna_to_unicode_8zlz: |
778 | | * @input: zero-terminated UTF-8 string. |
779 | | * @output: pointer to newly allocated output string encoded in the |
780 | | * current locale's character set. |
781 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
782 | | * %IDNA_USE_STD3_ASCII_RULES. |
783 | | * |
784 | | * Convert possibly ACE encoded domain name in UTF-8 format into a |
785 | | * string encoded in the current locale's character set. The domain |
786 | | * name may contain several labels, separated by dots. The output |
787 | | * buffer must be deallocated by the caller. |
788 | | * |
789 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
790 | | **/ |
791 | | int |
792 | | idna_to_unicode_8zlz (const char *input, char **output, int flags) |
793 | 0 | { |
794 | 0 | char *utf8; |
795 | 0 | int rc; |
796 | |
|
797 | 0 | rc = idna_to_unicode_8z8z (input, &utf8, flags); |
798 | 0 | if (rc != IDNA_SUCCESS) |
799 | 0 | return rc; |
800 | | |
801 | 0 | *output = stringprep_utf8_to_locale (utf8); |
802 | 0 | free (utf8); |
803 | |
|
804 | 0 | if (!*output) |
805 | 0 | return IDNA_ICONV_ERROR; |
806 | | |
807 | 0 | return IDNA_SUCCESS; |
808 | 0 | } |
809 | | |
810 | | /** |
811 | | * idna_to_unicode_lzlz: |
812 | | * @input: zero-terminated string encoded in the current locale's |
813 | | * character set. |
814 | | * @output: pointer to newly allocated output string encoded in the |
815 | | * current locale's character set. |
816 | | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or |
817 | | * %IDNA_USE_STD3_ASCII_RULES. |
818 | | * |
819 | | * Convert possibly ACE encoded domain name in the locale's character |
820 | | * set into a string encoded in the current locale's character set. |
821 | | * The domain name may contain several labels, separated by dots. The |
822 | | * output buffer must be deallocated by the caller. |
823 | | * |
824 | | * Return value: Returns %IDNA_SUCCESS on success, or error code. |
825 | | **/ |
826 | | int |
827 | | idna_to_unicode_lzlz (const char *input, char **output, int flags) |
828 | 0 | { |
829 | 0 | char *utf8; |
830 | 0 | int rc; |
831 | |
|
832 | 0 | utf8 = stringprep_locale_to_utf8 (input); |
833 | 0 | if (!utf8) |
834 | 0 | return IDNA_ICONV_ERROR; |
835 | | |
836 | 0 | rc = idna_to_unicode_8zlz (utf8, output, flags); |
837 | 0 | free (utf8); |
838 | |
|
839 | 0 | return rc; |
840 | 0 | } |
841 | | |
842 | | /** |
843 | | * IDNA_ACE_PREFIX |
844 | | * |
845 | | * The IANA allocated prefix to use for IDNA. "xn--" |
846 | | */ |
847 | | |
848 | | /** |
849 | | * Idna_rc: |
850 | | * @IDNA_SUCCESS: Successful operation. This value is guaranteed to |
851 | | * always be zero, the remaining ones are only guaranteed to hold |
852 | | * non-zero values, for logical comparison purposes. |
853 | | * @IDNA_STRINGPREP_ERROR: Error during string preparation. |
854 | | * @IDNA_PUNYCODE_ERROR: Error during punycode operation. |
855 | | * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that |
856 | | * the string contains non-LDH ASCII characters. |
857 | | * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility |
858 | | * with typo in earlier versions. |
859 | | * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that |
860 | | * the string contains a leading or trailing hyphen-minus (U+002D). |
861 | | * @IDNA_INVALID_LENGTH: The final output string is not within the |
862 | | * (inclusive) range 1 to 63 characters. |
863 | | * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix |
864 | | * (for ToUnicode). |
865 | | * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output |
866 | | * string does not equal the input. |
867 | | * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for |
868 | | * ToASCII). |
869 | | * @IDNA_ICONV_ERROR: Character encoding conversion error. |
870 | | * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a |
871 | | * fatal error). |
872 | | * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used |
873 | | * internally in libc). |
874 | | * |
875 | | * Enumerated return codes of idna_to_ascii_4i(), |
876 | | * idna_to_unicode_44i() functions (and functions derived from those |
877 | | * functions). The value 0 is guaranteed to always correspond to |
878 | | * success. |
879 | | */ |
880 | | |
881 | | |
882 | | /** |
883 | | * Idna_flags: |
884 | | * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned |
885 | | * Unicode code points. |
886 | | * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 |
887 | | * rules (i.e., normal host name rules). |
888 | | * |
889 | | * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. |
890 | | */ |