/src/libidn2/lib/lookup.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* lookup.c - implementation of IDNA2008 lookup functions |
2 | | Copyright (C) 2011-2022 Simon Josefsson |
3 | | Copyright (C) 2017-2022 Tim Ruehsen |
4 | | |
5 | | Libidn2 is free software: you can redistribute it and/or modify it |
6 | | under the terms of either: |
7 | | |
8 | | * the GNU Lesser General Public License as published by the Free |
9 | | Software Foundation; either version 3 of the License, or (at |
10 | | your option) any later version. |
11 | | |
12 | | or |
13 | | |
14 | | * the GNU General Public License as published by the Free |
15 | | Software Foundation; either version 2 of the License, or (at |
16 | | your option) any later version. |
17 | | |
18 | | or both in parallel, as here. |
19 | | |
20 | | This program is distributed in the hope that it will be useful, |
21 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | | GNU General Public License for more details. |
24 | | |
25 | | You should have received copies of the GNU General Public License and |
26 | | the GNU Lesser General Public License along with this program. If |
27 | | not, see <http://www.gnu.org/licenses/>. |
28 | | */ |
29 | | |
30 | | #include <config.h> |
31 | | |
32 | | #include "idn2.h" |
33 | | |
34 | | #include <errno.h> /* errno */ |
35 | | #include <stdlib.h> /* malloc, free */ |
36 | | |
37 | | #include "punycode.h" |
38 | | |
39 | | #include <unitypes.h> |
40 | | #include <uniconv.h> /* u8_strconv_from_locale */ |
41 | | #include <uninorm.h> /* u32_normalize */ |
42 | | #include <unistr.h> /* u8_to_u32 */ |
43 | | |
44 | | #include "idna.h" /* _idn2_label_test */ |
45 | | #include "tr46map.h" /* definition for tr46map.c */ |
46 | | |
47 | | #ifdef HAVE_LIBUNISTRING |
48 | | /* copied from gnulib */ |
49 | | # include <limits.h> |
50 | | # define _C_CTYPE_LOWER_N(N) \ |
51 | 0 | case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \ |
52 | 0 | case 'e' + (N): case 'f' + (N): \ |
53 | 0 | case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \ |
54 | 0 | case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \ |
55 | 0 | case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \ |
56 | 0 | case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \ |
57 | 0 | case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N) |
58 | 0 | # define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a') |
59 | | static inline int |
60 | | c_tolower (int c) |
61 | 0 | { |
62 | 0 | switch (c) |
63 | 0 | { |
64 | 0 | _C_CTYPE_UPPER: |
65 | 0 | return c - 'A' + 'a'; |
66 | 0 | default: |
67 | 0 | return c; |
68 | 0 | } |
69 | 0 | } |
70 | | |
71 | | static int |
72 | | c_strncasecmp (const char *s1, const char *s2, size_t n) |
73 | 0 | { |
74 | 0 | register const unsigned char *p1 = (const unsigned char *) s1; |
75 | 0 | register const unsigned char *p2 = (const unsigned char *) s2; |
76 | 0 | unsigned char c1, c2; |
77 | |
|
78 | 0 | if (p1 == p2 || n == 0) |
79 | 0 | return 0; |
80 | | |
81 | 0 | do |
82 | 0 | { |
83 | 0 | c1 = c_tolower (*p1); |
84 | 0 | c2 = c_tolower (*p2); |
85 | |
|
86 | 0 | if (--n == 0 || c1 == '\0') |
87 | 0 | break; |
88 | | |
89 | 0 | ++p1; |
90 | 0 | ++p2; |
91 | 0 | } |
92 | 0 | while (c1 == c2); |
93 | | |
94 | 0 | if (UCHAR_MAX <= INT_MAX) |
95 | 0 | return c1 - c2; |
96 | 0 | else |
97 | | /* On machines where 'char' and 'int' are types of the same size, the |
98 | | difference of two 'unsigned char' values - including the sign bit - |
99 | | doesn't fit in an 'int'. */ |
100 | 0 | return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); |
101 | 0 | } |
102 | | #else |
103 | | # include <c-strcase.h> |
104 | | #endif |
105 | | |
106 | | static int |
107 | | set_default_flags (int *flags) |
108 | 0 | { |
109 | 0 | if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL)) |
110 | 0 | return IDN2_INVALID_FLAGS; |
111 | | |
112 | 0 | if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL)) |
113 | 0 | && ((*flags) & IDN2_NO_TR46)) |
114 | 0 | return IDN2_INVALID_FLAGS; |
115 | | |
116 | 0 | if (((*flags) & IDN2_ALABEL_ROUNDTRIP) |
117 | 0 | && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP)) |
118 | 0 | return IDN2_INVALID_FLAGS; |
119 | | |
120 | 0 | if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL))) |
121 | 0 | *flags |= IDN2_NONTRANSITIONAL; |
122 | |
|
123 | 0 | return IDN2_OK; |
124 | 0 | } |
125 | | |
126 | | static int |
127 | | label (const uint8_t * src, size_t srclen, uint8_t * dst, size_t *dstlen, |
128 | | int flags) |
129 | 0 | { |
130 | 0 | size_t plen; |
131 | 0 | uint32_t *p = NULL; |
132 | 0 | const uint8_t *src_org = NULL; |
133 | 0 | uint8_t *src_allocated = NULL; |
134 | 0 | int rc, check_roundtrip = 0; |
135 | 0 | size_t tmpl, srclen_org = 0; |
136 | 0 | uint32_t label_u32[IDN2_LABEL_MAX_LENGTH]; |
137 | 0 | size_t label32_len = IDN2_LABEL_MAX_LENGTH; |
138 | |
|
139 | 0 | if (_idn2_ascii_p (src, srclen)) |
140 | 0 | { |
141 | 0 | if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4 |
142 | 0 | && memcmp (src, "xn--", 4) == 0) |
143 | 0 | { |
144 | | /* |
145 | | If the input to this procedure appears to be an A-label |
146 | | (i.e., it starts in "xn--", interpreted |
147 | | case-insensitively), the lookup application MAY attempt to |
148 | | convert it to a U-label, first ensuring that the A-label is |
149 | | entirely in lowercase (converting it to lowercase if |
150 | | necessary), and apply the tests of Section 5.4 and the |
151 | | conversion of Section 5.5 to that form. */ |
152 | 0 | rc = |
153 | 0 | _idn2_punycode_decode_internal (srclen - 4, (char *) src + 4, |
154 | 0 | &label32_len, label_u32); |
155 | 0 | if (rc) |
156 | 0 | return rc; |
157 | | |
158 | 0 | check_roundtrip = 1; |
159 | 0 | src_org = src; |
160 | 0 | srclen_org = srclen; |
161 | |
|
162 | 0 | srclen = IDN2_LABEL_MAX_LENGTH; |
163 | 0 | src = src_allocated = |
164 | 0 | u32_to_u8 (label_u32, label32_len, NULL, &srclen); |
165 | 0 | if (!src) |
166 | 0 | { |
167 | 0 | if (errno == ENOMEM) |
168 | 0 | return IDN2_MALLOC; |
169 | 0 | return IDN2_ENCODING_ERROR; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | else |
173 | 0 | { |
174 | 0 | if (srclen > IDN2_LABEL_MAX_LENGTH) |
175 | 0 | return IDN2_TOO_BIG_LABEL; |
176 | 0 | if (srclen > *dstlen) |
177 | 0 | return IDN2_TOO_BIG_DOMAIN; |
178 | | |
179 | 0 | memcpy (dst, src, srclen); |
180 | 0 | *dstlen = srclen; |
181 | 0 | return IDN2_OK; |
182 | 0 | } |
183 | 0 | } |
184 | | |
185 | 0 | rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT); |
186 | 0 | if (rc != IDN2_OK) |
187 | 0 | goto out; |
188 | | |
189 | 0 | if (!(flags & IDN2_TRANSITIONAL)) |
190 | 0 | { |
191 | 0 | rc = _idn2_label_test (TEST_NFC | |
192 | 0 | TEST_2HYPHEN | |
193 | 0 | TEST_LEADING_COMBINING | |
194 | 0 | TEST_DISALLOWED | |
195 | 0 | TEST_CONTEXTJ_RULE | |
196 | 0 | TEST_CONTEXTO_WITH_RULE | |
197 | 0 | TEST_UNASSIGNED | TEST_BIDI | |
198 | 0 | ((flags & IDN2_NONTRANSITIONAL) ? |
199 | 0 | TEST_NONTRANSITIONAL : 0) | ((flags & |
200 | 0 | IDN2_USE_STD3_ASCII_RULES) |
201 | 0 | ? 0 : |
202 | 0 | TEST_ALLOW_STD3_DISALLOWED), |
203 | 0 | p, plen); |
204 | |
|
205 | 0 | if (rc != IDN2_OK) |
206 | 0 | goto out; |
207 | 0 | } |
208 | | |
209 | 0 | dst[0] = 'x'; |
210 | 0 | dst[1] = 'n'; |
211 | 0 | dst[2] = '-'; |
212 | 0 | dst[3] = '-'; |
213 | |
|
214 | 0 | tmpl = *dstlen - 4; |
215 | 0 | rc = _idn2_punycode_encode_internal (plen, p, &tmpl, (char *) dst + 4); |
216 | 0 | if (rc != IDN2_OK) |
217 | 0 | goto out; |
218 | | |
219 | | |
220 | 0 | *dstlen = 4 + tmpl; |
221 | |
|
222 | 0 | if (check_roundtrip) |
223 | 0 | { |
224 | 0 | if (srclen_org != *dstlen |
225 | 0 | || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org)) |
226 | 0 | { |
227 | 0 | rc = IDN2_ALABEL_ROUNDTRIP_FAILED; |
228 | 0 | goto out; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP)) |
232 | 0 | { |
233 | 0 | rc = |
234 | 0 | _idn2_punycode_decode_internal (*dstlen - 4, (char *) dst + 4, |
235 | 0 | &label32_len, label_u32); |
236 | 0 | if (rc) |
237 | 0 | { |
238 | 0 | rc = IDN2_ALABEL_ROUNDTRIP_FAILED; |
239 | 0 | goto out; |
240 | 0 | } |
241 | | |
242 | 0 | if (plen != label32_len || u32_cmp (p, label_u32, label32_len)) |
243 | 0 | { |
244 | 0 | rc = IDN2_ALABEL_ROUNDTRIP_FAILED; |
245 | 0 | goto out; |
246 | 0 | } |
247 | 0 | } |
248 | | |
249 | 0 | rc = IDN2_OK; |
250 | |
|
251 | 0 | out: |
252 | 0 | free (p); |
253 | 0 | free (src_allocated); |
254 | 0 | return rc; |
255 | 0 | } |
256 | | |
257 | | #define TR46_TRANSITIONAL_CHECK \ |
258 | 0 | (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL) |
259 | | #define TR46_NONTRANSITIONAL_CHECK \ |
260 | 0 | (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL) |
261 | | |
262 | | static int |
263 | | _tr46 (const uint8_t * domain_u8, uint8_t ** out, int flags) |
264 | 0 | { |
265 | 0 | size_t len, it; |
266 | 0 | uint32_t *domain_u32; |
267 | 0 | int err = IDN2_OK, rc; |
268 | 0 | int transitional = 0; |
269 | 0 | int test_flags; |
270 | |
|
271 | 0 | if (flags & IDN2_TRANSITIONAL) |
272 | 0 | transitional = 1; |
273 | | |
274 | | /* convert UTF-8 to UTF-32 */ |
275 | 0 | if (!(domain_u32 = |
276 | 0 | u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len))) |
277 | 0 | { |
278 | 0 | if (errno == ENOMEM) |
279 | 0 | return IDN2_MALLOC; |
280 | 0 | return IDN2_ENCODING_ERROR; |
281 | 0 | } |
282 | | |
283 | 0 | size_t len2 = 0; |
284 | 0 | for (it = 0; it < len - 1; it++) |
285 | 0 | { |
286 | 0 | IDNAMap map; |
287 | |
|
288 | 0 | get_idna_map (domain_u32[it], &map); |
289 | |
|
290 | 0 | if (map_is (&map, TR46_FLG_DISALLOWED)) |
291 | 0 | { |
292 | 0 | if (domain_u32[it]) |
293 | 0 | { |
294 | 0 | free (domain_u32); |
295 | 0 | return IDN2_DISALLOWED; |
296 | 0 | } |
297 | 0 | len2++; |
298 | 0 | } |
299 | 0 | else if (map_is (&map, TR46_FLG_MAPPED)) |
300 | 0 | { |
301 | 0 | len2 += map.nmappings; |
302 | 0 | } |
303 | 0 | else if (map_is (&map, TR46_FLG_VALID)) |
304 | 0 | { |
305 | 0 | len2++; |
306 | 0 | } |
307 | 0 | else if (map_is (&map, TR46_FLG_IGNORED)) |
308 | 0 | { |
309 | 0 | continue; |
310 | 0 | } |
311 | 0 | else if (map_is (&map, TR46_FLG_DEVIATION)) |
312 | 0 | { |
313 | 0 | if (transitional) |
314 | 0 | { |
315 | 0 | len2 += map.nmappings; |
316 | 0 | } |
317 | 0 | else |
318 | 0 | len2++; |
319 | 0 | } |
320 | 0 | else if (!(flags & IDN2_USE_STD3_ASCII_RULES)) |
321 | 0 | { |
322 | 0 | if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID)) |
323 | 0 | { |
324 | | /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */ |
325 | 0 | len2++; |
326 | 0 | } |
327 | 0 | else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)) |
328 | 0 | { |
329 | | /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */ |
330 | 0 | len2 += map.nmappings; |
331 | 0 | } |
332 | 0 | } |
333 | 0 | } |
334 | | |
335 | | /* Exit early if result is too long. |
336 | | * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */ |
337 | 0 | if (len2 >= IDN2_DOMAIN_MAX_LENGTH) |
338 | 0 | { |
339 | 0 | free (domain_u32); |
340 | 0 | return IDN2_TOO_BIG_DOMAIN; |
341 | 0 | } |
342 | | |
343 | 0 | uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t)); |
344 | 0 | if (!tmp) |
345 | 0 | { |
346 | 0 | free (domain_u32); |
347 | 0 | return IDN2_MALLOC; |
348 | 0 | } |
349 | | |
350 | 0 | len2 = 0; |
351 | 0 | for (it = 0; it < len - 1; it++) |
352 | 0 | { |
353 | 0 | uint32_t c = domain_u32[it]; |
354 | 0 | IDNAMap map; |
355 | |
|
356 | 0 | get_idna_map (c, &map); |
357 | |
|
358 | 0 | if (map_is (&map, TR46_FLG_DISALLOWED)) |
359 | 0 | { |
360 | 0 | tmp[len2++] = c; |
361 | 0 | } |
362 | 0 | else if (map_is (&map, TR46_FLG_MAPPED)) |
363 | 0 | { |
364 | 0 | len2 += get_map_data (tmp + len2, &map); |
365 | 0 | } |
366 | 0 | else if (map_is (&map, TR46_FLG_VALID)) |
367 | 0 | { |
368 | 0 | tmp[len2++] = c; |
369 | 0 | } |
370 | 0 | else if (map_is (&map, TR46_FLG_IGNORED)) |
371 | 0 | { |
372 | 0 | continue; |
373 | 0 | } |
374 | 0 | else if (map_is (&map, TR46_FLG_DEVIATION)) |
375 | 0 | { |
376 | 0 | if (transitional) |
377 | 0 | { |
378 | 0 | len2 += get_map_data (tmp + len2, &map); |
379 | 0 | } |
380 | 0 | else |
381 | 0 | tmp[len2++] = c; |
382 | 0 | } |
383 | 0 | else if (!(flags & IDN2_USE_STD3_ASCII_RULES)) |
384 | 0 | { |
385 | 0 | if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID)) |
386 | 0 | { |
387 | 0 | tmp[len2++] = c; |
388 | 0 | } |
389 | 0 | else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)) |
390 | 0 | { |
391 | 0 | len2 += get_map_data (tmp + len2, &map); |
392 | 0 | } |
393 | 0 | } |
394 | 0 | } |
395 | 0 | free (domain_u32); |
396 | | |
397 | | /* Normalize to NFC */ |
398 | 0 | tmp[len2] = 0; |
399 | 0 | domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len); |
400 | 0 | free (tmp); |
401 | 0 | tmp = NULL; |
402 | |
|
403 | 0 | if (!domain_u32) |
404 | 0 | { |
405 | 0 | if (errno == ENOMEM) |
406 | 0 | return IDN2_MALLOC; |
407 | 0 | return IDN2_ENCODING_ERROR; |
408 | 0 | } |
409 | | |
410 | | /* split into labels and check */ |
411 | 0 | uint32_t *e, *s; |
412 | 0 | for (e = s = domain_u32; *e; s = e) |
413 | 0 | { |
414 | 0 | while (*e && *e != '.') |
415 | 0 | e++; |
416 | |
|
417 | 0 | if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-' |
418 | 0 | && s[3] == '-') |
419 | 0 | { |
420 | | /* decode punycode and check result non-transitional */ |
421 | 0 | size_t ace_len; |
422 | 0 | uint32_t name_u32[IDN2_LABEL_MAX_LENGTH]; |
423 | 0 | size_t name_len = IDN2_LABEL_MAX_LENGTH; |
424 | 0 | uint8_t *ace; |
425 | |
|
426 | 0 | ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len); |
427 | 0 | if (!ace) |
428 | 0 | { |
429 | 0 | free (domain_u32); |
430 | 0 | if (errno == ENOMEM) |
431 | 0 | return IDN2_MALLOC; |
432 | 0 | return IDN2_ENCODING_ERROR; |
433 | 0 | } |
434 | | |
435 | 0 | rc = _idn2_punycode_decode_internal (ace_len, (char *) ace, |
436 | 0 | &name_len, name_u32); |
437 | |
|
438 | 0 | free (ace); |
439 | |
|
440 | 0 | if (rc) |
441 | 0 | { |
442 | 0 | free (domain_u32); |
443 | 0 | return rc; |
444 | 0 | } |
445 | | |
446 | 0 | test_flags = TR46_NONTRANSITIONAL_CHECK; |
447 | |
|
448 | 0 | if (!(flags & IDN2_USE_STD3_ASCII_RULES)) |
449 | 0 | test_flags |= TEST_ALLOW_STD3_DISALLOWED; |
450 | |
|
451 | 0 | if ((rc = _idn2_label_test (test_flags, name_u32, name_len))) |
452 | 0 | err = rc; |
453 | 0 | } |
454 | 0 | else |
455 | 0 | { |
456 | 0 | test_flags = |
457 | 0 | transitional ? TR46_TRANSITIONAL_CHECK : |
458 | 0 | TR46_NONTRANSITIONAL_CHECK; |
459 | |
|
460 | 0 | if (!(flags & IDN2_USE_STD3_ASCII_RULES)) |
461 | 0 | test_flags |= TEST_ALLOW_STD3_DISALLOWED; |
462 | |
|
463 | 0 | if ((rc = _idn2_label_test (test_flags, s, e - s))) |
464 | 0 | err = rc; |
465 | 0 | } |
466 | | |
467 | 0 | if (*e) |
468 | 0 | e++; |
469 | 0 | } |
470 | | |
471 | 0 | if (err == IDN2_OK && out) |
472 | 0 | { |
473 | 0 | uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len); |
474 | 0 | free (domain_u32); |
475 | |
|
476 | 0 | if (!_out) |
477 | 0 | { |
478 | 0 | if (errno == ENOMEM) |
479 | 0 | return IDN2_MALLOC; |
480 | 0 | return IDN2_ENCODING_ERROR; |
481 | 0 | } |
482 | | |
483 | 0 | *out = _out; |
484 | 0 | } |
485 | 0 | else |
486 | 0 | free (domain_u32); |
487 | | |
488 | 0 | return err; |
489 | 0 | } |
490 | | |
491 | | /** |
492 | | * idn2_lookup_u8: |
493 | | * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form. |
494 | | * @lookupname: newly allocated output variable with name to lookup in DNS. |
495 | | * @flags: optional #idn2_flags to modify behaviour. |
496 | | * |
497 | | * Perform IDNA2008 lookup string conversion on domain name @src, as |
498 | | * described in section 5 of RFC 5891. Note that the input string |
499 | | * must be encoded in UTF-8 and be in Unicode NFC form. |
500 | | * |
501 | | * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before |
502 | | * further processing. %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL |
503 | | * do already imply %IDN2_NFC_INPUT. |
504 | | * |
505 | | * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to |
506 | | * convert any input A-labels to U-labels and perform additional |
507 | | * testing. This is default since version 2.2. |
508 | | * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP |
509 | | * |
510 | | * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 |
511 | | * transitional processing, and %IDN2_NONTRANSITIONAL to enable |
512 | | * Unicode TR46 non-transitional processing. |
513 | | * |
514 | | * Multiple flags may be specified by binary or:ing them together. |
515 | | * |
516 | | * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default. |
517 | | * Previously we were eliminating non-STD3 characters from domain strings |
518 | | * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2 |
519 | | * functions. That was an unexpected regression for applications switching |
520 | | * from libidn and thus it is no longer applied by default. |
521 | | * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again. |
522 | | * |
523 | | * After version 0.11: @lookupname may be NULL to test lookup of @src |
524 | | * without allocating memory. |
525 | | * |
526 | | * Returns: On successful conversion %IDN2_OK is returned, if the |
527 | | * output domain or any label would have been too long |
528 | | * %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or |
529 | | * another error code is returned. |
530 | | * |
531 | | * Since: 0.1 |
532 | | **/ |
533 | | int |
534 | | idn2_lookup_u8 (const uint8_t * src, uint8_t ** lookupname, int flags) |
535 | 0 | { |
536 | 0 | size_t lookupnamelen = 0; |
537 | 0 | uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1]; |
538 | 0 | uint8_t *src_allocated = NULL; |
539 | 0 | int rc; |
540 | |
|
541 | 0 | if (src == NULL) |
542 | 0 | { |
543 | 0 | if (lookupname) |
544 | 0 | *lookupname = NULL; |
545 | 0 | return IDN2_OK; |
546 | 0 | } |
547 | | |
548 | 0 | rc = set_default_flags (&flags); |
549 | 0 | if (rc != IDN2_OK) |
550 | 0 | return rc; |
551 | | |
552 | 0 | if (!(flags & IDN2_NO_TR46)) |
553 | 0 | { |
554 | 0 | uint8_t *out; |
555 | |
|
556 | 0 | rc = _tr46 (src, &out, flags); |
557 | 0 | if (rc != IDN2_OK) |
558 | 0 | return rc; |
559 | | |
560 | 0 | src = src_allocated = out; |
561 | 0 | } |
562 | | |
563 | 0 | do |
564 | 0 | { |
565 | 0 | const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.'); |
566 | | /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E |
567 | | and U+FF61 here? Perhaps when IDN2_NFC_INPUT? */ |
568 | 0 | size_t labellen = end - src; |
569 | 0 | uint8_t tmp[IDN2_LABEL_MAX_LENGTH]; |
570 | 0 | size_t tmplen = IDN2_LABEL_MAX_LENGTH; |
571 | |
|
572 | 0 | rc = label (src, labellen, tmp, &tmplen, flags); |
573 | 0 | if (rc != IDN2_OK) |
574 | 0 | { |
575 | 0 | free (src_allocated); |
576 | 0 | return rc; |
577 | 0 | } |
578 | | |
579 | 0 | if (lookupnamelen + tmplen |
580 | 0 | > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2)) |
581 | 0 | { |
582 | 0 | free (src_allocated); |
583 | 0 | return IDN2_TOO_BIG_DOMAIN; |
584 | 0 | } |
585 | | |
586 | 0 | memcpy (_lookupname + lookupnamelen, tmp, tmplen); |
587 | 0 | lookupnamelen += tmplen; |
588 | |
|
589 | 0 | if (*end == '.') |
590 | 0 | { |
591 | 0 | if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH) |
592 | 0 | { |
593 | 0 | free (src_allocated); |
594 | 0 | return IDN2_TOO_BIG_DOMAIN; |
595 | 0 | } |
596 | | |
597 | 0 | _lookupname[lookupnamelen] = '.'; |
598 | 0 | lookupnamelen++; |
599 | 0 | } |
600 | 0 | _lookupname[lookupnamelen] = '\0'; |
601 | |
|
602 | 0 | src = end; |
603 | 0 | } |
604 | 0 | while (*src++); |
605 | | |
606 | 0 | free (src_allocated); |
607 | |
|
608 | 0 | if (lookupname) |
609 | 0 | { |
610 | 0 | uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1); |
611 | |
|
612 | 0 | if (tmp == NULL) |
613 | 0 | return IDN2_MALLOC; |
614 | | |
615 | 0 | memcpy (tmp, _lookupname, lookupnamelen + 1); |
616 | 0 | *lookupname = tmp; |
617 | 0 | } |
618 | | |
619 | 0 | return IDN2_OK; |
620 | 0 | } |
621 | | |
622 | | /** |
623 | | * idn2_lookup_ul: |
624 | | * @src: input zero-terminated locale encoded string. |
625 | | * @lookupname: newly allocated output variable with name to lookup in DNS. |
626 | | * @flags: optional #idn2_flags to modify behaviour. |
627 | | * |
628 | | * Perform IDNA2008 lookup string conversion on domain name @src, as |
629 | | * described in section 5 of RFC 5891. Note that the input is assumed |
630 | | * to be encoded in the locale's default coding system, and will be |
631 | | * transcoded to UTF-8 and NFC normalized by this function. |
632 | | * |
633 | | * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to |
634 | | * convert any input A-labels to U-labels and perform additional |
635 | | * testing. This is default since version 2.2. |
636 | | * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP |
637 | | * |
638 | | * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing, |
639 | | * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional |
640 | | * processing. |
641 | | * |
642 | | * Multiple flags may be specified by binary or:ing them together, for |
643 | | * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL. |
644 | | * |
645 | | * The %IDN2_NFC_INPUT in @flags is always enabled in this function. |
646 | | * |
647 | | * After version 0.11: @lookupname may be NULL to test lookup of @src |
648 | | * without allocating memory. |
649 | | * |
650 | | * Returns: On successful conversion %IDN2_OK is returned, if |
651 | | * conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is |
652 | | * returned, if the output domain or any label would have been too |
653 | | * long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or |
654 | | * another error code is returned. |
655 | | * |
656 | | * Since: 0.1 |
657 | | **/ |
658 | | int |
659 | | idn2_lookup_ul (const char *src, char **lookupname, int flags) |
660 | 0 | { |
661 | 0 | uint8_t *utf8src = NULL; |
662 | 0 | int rc; |
663 | |
|
664 | 0 | if (src) |
665 | 0 | { |
666 | 0 | const char *encoding = locale_charset (); |
667 | |
|
668 | 0 | utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error); |
669 | |
|
670 | 0 | if (!utf8src) |
671 | 0 | { |
672 | 0 | if (errno == ENOMEM) |
673 | 0 | return IDN2_MALLOC; |
674 | 0 | return IDN2_ICONV_FAIL; |
675 | 0 | } |
676 | 0 | } |
677 | | |
678 | 0 | rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname, |
679 | 0 | flags | IDN2_NFC_INPUT); |
680 | |
|
681 | 0 | free (utf8src); |
682 | |
|
683 | 0 | return rc; |
684 | 0 | } |
685 | | |
686 | | /** |
687 | | * idn2_to_ascii_4i: |
688 | | * @input: zero terminated input Unicode (UCS-4) string. |
689 | | * @inlen: number of elements in @input. |
690 | | * @output: output zero terminated string that must have room for at least 63 characters plus the terminating zero. |
691 | | * @flags: optional #idn2_flags to modify behaviour. |
692 | | * |
693 | | * THIS FUNCTION HAS BEEN DEPRECATED DUE TO A DESIGN FLAW. USE idn2_to_ascii_4i2() INSTEAD ! |
694 | | * |
695 | | * The ToASCII operation takes a sequence of Unicode code points that make |
696 | | * up one domain label and transforms it into a sequence of code points in |
697 | | * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and |
698 | | * the resulting sequence are equivalent labels. |
699 | | * |
700 | | * It is important to note that the ToASCII operation can fail. |
701 | | * ToASCII fails if any step of it fails. If any step of the |
702 | | * ToASCII operation fails on any label in a domain name, that domain |
703 | | * name MUST NOT be used as an internationalized domain name. |
704 | | * The method for dealing with this failure is application-specific. |
705 | | * |
706 | | * The inputs to ToASCII are a sequence of code points. |
707 | | * |
708 | | * ToASCII never alters a sequence of code points that are all in the ASCII |
709 | | * range to begin with (although it could fail). Applying the ToASCII operation multiple |
710 | | * effect as applying it just once. |
711 | | * |
712 | | * The default behavior of this function (when flags are zero) is to apply |
713 | | * the IDNA2008 rules without the TR46 amendments. As the TR46 |
714 | | * non-transitional processing is nowadays ubiquitous, when unsure, it is |
715 | | * recommended to call this function with the %IDN2_NONTRANSITIONAL |
716 | | * and the %IDN2_NFC_INPUT flags for compatibility with other software. |
717 | | * |
718 | | * Return value: Returns %IDN2_OK on success, or error code. |
719 | | * |
720 | | * Since: 2.0.0 |
721 | | * |
722 | | * Deprecated: 2.1.1: Use idn2_to_ascii_4i2(). |
723 | | **/ |
724 | | int |
725 | | idn2_to_ascii_4i (const uint32_t * input, size_t inlen, char *output, |
726 | | int flags) |
727 | 0 | { |
728 | 0 | char *out; |
729 | 0 | int rc; |
730 | |
|
731 | 0 | if (!input) |
732 | 0 | { |
733 | 0 | if (output) |
734 | 0 | *output = 0; |
735 | 0 | return IDN2_OK; |
736 | 0 | } |
737 | | |
738 | 0 | rc = idn2_to_ascii_4i2 (input, inlen, &out, flags); |
739 | 0 | if (rc == IDN2_OK) |
740 | 0 | { |
741 | 0 | size_t len = strlen (out); |
742 | |
|
743 | 0 | if (len > 63) |
744 | 0 | rc = IDN2_TOO_BIG_DOMAIN; |
745 | 0 | else if (output) |
746 | 0 | memcpy (output, out, len); |
747 | |
|
748 | 0 | free (out); |
749 | 0 | } |
750 | |
|
751 | 0 | return rc; |
752 | 0 | } |
753 | | |
754 | | /** |
755 | | * idn2_to_ascii_4i2: |
756 | | * @input: zero terminated input Unicode (UCS-4) string. |
757 | | * @inlen: number of elements in @input. |
758 | | * @output: pointer to newly allocated zero-terminated output string. |
759 | | * @flags: optional #idn2_flags to modify behaviour. |
760 | | * |
761 | | * The ToASCII operation takes a sequence of Unicode code points that make |
762 | | * up one domain label and transforms it into a sequence of code points in |
763 | | * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and |
764 | | * the resulting sequence are equivalent labels. |
765 | | * |
766 | | * It is important to note that the ToASCII operation can fail. |
767 | | * ToASCII fails if any step of it fails. If any step of the |
768 | | * ToASCII operation fails on any label in a domain name, that domain |
769 | | * name MUST NOT be used as an internationalized domain name. |
770 | | * The method for dealing with this failure is application-specific. |
771 | | * |
772 | | * The inputs to ToASCII are a sequence of code points. |
773 | | * |
774 | | * ToASCII never alters a sequence of code points that are all in the ASCII |
775 | | * range to begin with (although it could fail). Applying the ToASCII operation multiple |
776 | | * effect as applying it just once. |
777 | | * |
778 | | * The default behavior of this function (when flags are zero) is to apply |
779 | | * the IDNA2008 rules without the TR46 amendments. As the TR46 |
780 | | * non-transitional processing is nowadays ubiquitous, when unsure, it is |
781 | | * recommended to call this function with the %IDN2_NONTRANSITIONAL |
782 | | * and the %IDN2_NFC_INPUT flags for compatibility with other software. |
783 | | * |
784 | | * Return value: Returns %IDN2_OK on success, or error code. |
785 | | * |
786 | | * Since: 2.1.1 |
787 | | **/ |
788 | | int |
789 | | idn2_to_ascii_4i2 (const uint32_t * input, size_t inlen, char **output, |
790 | | int flags) |
791 | 0 | { |
792 | 0 | uint32_t *input_u32; |
793 | 0 | uint8_t *input_u8, *output_u8; |
794 | 0 | size_t length; |
795 | 0 | int rc; |
796 | |
|
797 | 0 | if (!input) |
798 | 0 | { |
799 | 0 | if (output) |
800 | 0 | *output = NULL; |
801 | 0 | return IDN2_OK; |
802 | 0 | } |
803 | | |
804 | 0 | input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t)); |
805 | 0 | if (!input_u32) |
806 | 0 | return IDN2_MALLOC; |
807 | | |
808 | 0 | u32_cpy (input_u32, input, inlen); |
809 | 0 | input_u32[inlen] = 0; |
810 | |
|
811 | 0 | input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length); |
812 | 0 | free (input_u32); |
813 | 0 | if (!input_u8) |
814 | 0 | { |
815 | 0 | if (errno == ENOMEM) |
816 | 0 | return IDN2_MALLOC; |
817 | 0 | return IDN2_ENCODING_ERROR; |
818 | 0 | } |
819 | | |
820 | 0 | rc = idn2_lookup_u8 (input_u8, &output_u8, flags); |
821 | 0 | free (input_u8); |
822 | |
|
823 | 0 | if (rc == IDN2_OK) |
824 | 0 | { |
825 | 0 | if (output) |
826 | 0 | *output = (char *) output_u8; |
827 | 0 | else |
828 | 0 | free (output_u8); |
829 | 0 | } |
830 | |
|
831 | 0 | return rc; |
832 | 0 | } |
833 | | |
834 | | /** |
835 | | * idn2_to_ascii_4z: |
836 | | * @input: zero terminated input Unicode (UCS-4) string. |
837 | | * @output: pointer to newly allocated zero-terminated output string. |
838 | | * @flags: optional #idn2_flags to modify behaviour. |
839 | | * |
840 | | * Convert UCS-4 domain name to ASCII string using the IDNA2008 |
841 | | * rules. The domain name may contain several labels, separated by dots. |
842 | | * The output buffer must be deallocated by the caller. |
843 | | * |
844 | | * The default behavior of this function (when flags are zero) is to apply |
845 | | * the IDNA2008 rules without the TR46 amendments. As the TR46 |
846 | | * non-transitional processing is nowadays ubiquitous, when unsure, it is |
847 | | * recommended to call this function with the %IDN2_NONTRANSITIONAL |
848 | | * and the %IDN2_NFC_INPUT flags for compatibility with other software. |
849 | | * |
850 | | * Return value: Returns %IDN2_OK on success, or error code. |
851 | | * |
852 | | * Since: 2.0.0 |
853 | | **/ |
854 | | int |
855 | | idn2_to_ascii_4z (const uint32_t * input, char **output, int flags) |
856 | 0 | { |
857 | 0 | uint8_t *input_u8; |
858 | 0 | size_t length; |
859 | 0 | int rc; |
860 | |
|
861 | 0 | if (!input) |
862 | 0 | { |
863 | 0 | if (output) |
864 | 0 | *output = NULL; |
865 | 0 | return IDN2_OK; |
866 | 0 | } |
867 | | |
868 | 0 | input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length); |
869 | 0 | if (!input_u8) |
870 | 0 | { |
871 | 0 | if (errno == ENOMEM) |
872 | 0 | return IDN2_MALLOC; |
873 | 0 | return IDN2_ENCODING_ERROR; |
874 | 0 | } |
875 | | |
876 | 0 | rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags); |
877 | 0 | free (input_u8); |
878 | |
|
879 | 0 | return rc; |
880 | 0 | } |
881 | | |
882 | | /** |
883 | | * idn2_to_ascii_8z: |
884 | | * @input: zero terminated input UTF-8 string. |
885 | | * @output: pointer to newly allocated output string. |
886 | | * @flags: optional #idn2_flags to modify behaviour. |
887 | | * |
888 | | * Convert UTF-8 domain name to ASCII string using the IDNA2008 |
889 | | * rules. The domain name may contain several labels, separated by dots. |
890 | | * The output buffer must be deallocated by the caller. |
891 | | * |
892 | | * The default behavior of this function (when flags are zero) is to apply |
893 | | * the IDNA2008 rules without the TR46 amendments. As the TR46 |
894 | | * non-transitional processing is nowadays ubiquitous, when unsure, it is |
895 | | * recommended to call this function with the %IDN2_NONTRANSITIONAL |
896 | | * and the %IDN2_NFC_INPUT flags for compatibility with other software. |
897 | | * |
898 | | * Return value: Returns %IDN2_OK on success, or error code. |
899 | | * |
900 | | * Since: 2.0.0 |
901 | | **/ |
902 | | int |
903 | | idn2_to_ascii_8z (const char *input, char **output, int flags) |
904 | 0 | { |
905 | 0 | return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags); |
906 | 0 | } |
907 | | |
908 | | /** |
909 | | * idn2_to_ascii_lz: |
910 | | * @input: zero terminated input UTF-8 string. |
911 | | * @output: pointer to newly allocated output string. |
912 | | * @flags: optional #idn2_flags to modify behaviour. |
913 | | * |
914 | | * Convert a domain name in locale's encoding to ASCII string using the IDNA2008 |
915 | | * rules. The domain name may contain several labels, separated by dots. |
916 | | * The output buffer must be deallocated by the caller. |
917 | | * |
918 | | * The default behavior of this function (when flags are zero) is to apply |
919 | | * the IDNA2008 rules without the TR46 amendments. As the TR46 |
920 | | * non-transitional processing is nowadays ubiquitous, when unsure, it is |
921 | | * recommended to call this function with the %IDN2_NONTRANSITIONAL |
922 | | * and the %IDN2_NFC_INPUT flags for compatibility with other software. |
923 | | * |
924 | | * Returns: %IDN2_OK on success, or error code. |
925 | | * Same as described in idn2_lookup_ul() documentation. |
926 | | * |
927 | | * Since: 2.0.0 |
928 | | **/ |
929 | | int |
930 | | idn2_to_ascii_lz (const char *input, char **output, int flags) |
931 | 0 | { |
932 | 0 | return idn2_lookup_ul (input, output, flags); |
933 | 0 | } |