Coverage Report

Created: 2025-11-16 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/lookup.c
Line
Count
Source
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2025 Simon Josefsson
3
   Copyright (C) 2017-2025 Tim Ruehsen
4
5
   Libidn2 is free software: you can redistribute it and/or modify it
6
   under the terms of either:
7
8
     * the GNU Lesser General Public License as published by the Free
9
       Software Foundation; either version 3 of the License, or (at
10
       your option) any later version.
11
12
   or
13
14
     * the GNU General Public License as published by the Free
15
       Software Foundation; either version 2 of the License, or (at
16
       your option) any later version.
17
18
   or both in parallel, as here.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License for more details.
24
25
   You should have received copies of the GNU General Public License and
26
   the GNU Lesser General Public License along with this program.  If
27
   not, see <http://www.gnu.org/licenses/>.
28
*/
29
30
#include <config.h>
31
32
#include "idn2.h"
33
34
#include <errno.h>    /* errno */
35
#include <stdlib.h>   /* malloc, free */
36
37
#include <unitypes.h>
38
#include <uniconv.h>    /* u8_strconv_from_locale */
39
#include <uninorm.h>    /* u32_normalize */
40
#include <unistr.h>   /* u8_to_u32 */
41
42
#include "idna.h"   /* _idn2_label_test */
43
#include "tr46map.h"    /* definition for tr46map.c */
44
45
#ifdef HAVE_LIBUNISTRING
46
/* copied from gnulib */
47
# include <limits.h>
48
# define _C_CTYPE_LOWER_N(N) \
49
0
   case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
50
0
   case 'e' + (N): case 'f' + (N): \
51
0
   case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
52
0
   case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
53
0
   case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
54
0
   case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
55
0
   case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
56
0
# define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
57
static inline int
58
c_tolower (int c)
59
206k
{
60
206k
  switch (c)
61
206k
    {
62
0
    _C_CTYPE_UPPER:
63
0
      return c - 'A' + 'a';
64
206k
    default:
65
206k
      return c;
66
206k
    }
67
206k
}
68
69
static int
70
c_strncasecmp (const char *s1, const char *s2, size_t n)
71
7.52k
{
72
7.52k
  register const unsigned char *p1 = (const unsigned char *) s1;
73
7.52k
  register const unsigned char *p2 = (const unsigned char *) s2;
74
7.52k
  unsigned char c1, c2;
75
76
7.52k
  if (p1 == p2 || n == 0)
77
0
    return 0;
78
79
7.52k
  do
80
103k
    {
81
103k
      c1 = c_tolower (*p1);
82
103k
      c2 = c_tolower (*p2);
83
84
103k
      if (--n == 0 || c1 == '\0')
85
7.52k
  break;
86
87
95.8k
      ++p1;
88
95.8k
      ++p2;
89
95.8k
    }
90
95.8k
  while (c1 == c2);
91
92
7.52k
  if (UCHAR_MAX <= INT_MAX)
93
7.52k
    return c1 - c2;
94
0
  else
95
    /* On machines where 'char' and 'int' are types of the same size, the
96
       difference of two 'unsigned char' values - including the sign bit -
97
       doesn't fit in an 'int'.  */
98
0
    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
99
7.52k
}
100
#else
101
# include <c-strcase.h>
102
#endif
103
104
static int
105
set_default_flags (int *flags)
106
111k
{
107
111k
  if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
108
0
    return IDN2_INVALID_FLAGS;
109
110
111k
  if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
111
111k
      && ((*flags) & IDN2_NO_TR46))
112
0
    return IDN2_INVALID_FLAGS;
113
114
111k
  if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
115
0
      && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
116
0
    return IDN2_INVALID_FLAGS;
117
118
111k
  if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
119
60.2k
    *flags |= IDN2_NONTRANSITIONAL;
120
121
111k
  return IDN2_OK;
122
111k
}
123
124
static int
125
label (const uint8_t *src, size_t srclen, uint8_t *dst, size_t *dstlen,
126
       int flags)
127
71.1k
{
128
71.1k
  size_t plen;
129
71.1k
  uint32_t *p = NULL;
130
71.1k
  const uint8_t *src_org = NULL;
131
71.1k
  uint8_t *src_allocated = NULL;
132
71.1k
  int rc, check_roundtrip = 0;
133
71.1k
  size_t tmpl, srclen_org = 0;
134
71.1k
  uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
135
71.1k
  size_t label32_len = IDN2_LABEL_MAX_LENGTH;
136
137
71.1k
  if (_idn2_ascii_p (src, srclen))
138
19.0k
    {
139
19.0k
      if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
140
13.2k
    && memcmp (src, "xn--", 4) == 0)
141
11.2k
  {
142
    /*
143
       If the input to this procedure appears to be an A-label
144
       (i.e., it starts in "xn--", interpreted
145
       case-insensitively), the lookup application MAY attempt to
146
       convert it to a U-label, first ensuring that the A-label is
147
       entirely in lowercase (converting it to lowercase if
148
       necessary), and apply the tests of Section 5.4 and the
149
       conversion of Section 5.5 to that form. */
150
11.2k
    rc = idn2_punycode_decode ((char *) src + 4, srclen - 4,
151
11.2k
             label_u32, &label32_len);
152
11.2k
    if (rc)
153
0
      return rc;
154
155
11.2k
    check_roundtrip = 1;
156
11.2k
    src_org = src;
157
11.2k
    srclen_org = srclen;
158
159
11.2k
    srclen = IDN2_LABEL_MAX_LENGTH;
160
11.2k
    src = src_allocated =
161
11.2k
      u32_to_u8 (label_u32, label32_len, NULL, &srclen);
162
11.2k
    if (!src)
163
0
      {
164
0
        if (errno == ENOMEM)
165
0
    return IDN2_MALLOC;
166
0
        return IDN2_ENCODING_ERROR;
167
0
      }
168
11.2k
  }
169
7.78k
      else
170
7.78k
  {
171
7.78k
    if (srclen > IDN2_LABEL_MAX_LENGTH)
172
1.00k
      return IDN2_TOO_BIG_LABEL;
173
6.78k
    if (srclen > *dstlen)
174
0
      return IDN2_TOO_BIG_DOMAIN;
175
176
6.78k
    memcpy (dst, src, srclen);
177
6.78k
    *dstlen = srclen;
178
6.78k
    return IDN2_OK;
179
6.78k
  }
180
19.0k
    }
181
182
63.3k
  rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
183
63.3k
  if (rc != IDN2_OK)
184
0
    goto out;
185
186
63.3k
  if (!(flags & IDN2_TRANSITIONAL))
187
34.4k
    {
188
34.4k
      rc = _idn2_label_test (TEST_NFC |
189
34.4k
           TEST_2HYPHEN |
190
34.4k
           TEST_LEADING_COMBINING |
191
34.4k
           TEST_DISALLOWED |
192
34.4k
           TEST_CONTEXTJ_RULE |
193
34.4k
           TEST_CONTEXTO_WITH_RULE |
194
34.4k
           TEST_UNASSIGNED | TEST_BIDI |
195
34.4k
           ((flags & IDN2_NONTRANSITIONAL) ?
196
34.4k
            TEST_NONTRANSITIONAL : 0) | ((flags &
197
34.4k
                  IDN2_USE_STD3_ASCII_RULES)
198
34.4k
                 ? 0 :
199
34.4k
                 TEST_ALLOW_STD3_DISALLOWED),
200
34.4k
           p, plen);
201
202
34.4k
      if (rc != IDN2_OK)
203
14.9k
  goto out;
204
34.4k
    }
205
206
48.4k
  dst[0] = 'x';
207
48.4k
  dst[1] = 'n';
208
48.4k
  dst[2] = '-';
209
48.4k
  dst[3] = '-';
210
211
48.4k
  tmpl = *dstlen - 4;
212
48.4k
  rc = idn2_punycode_encode (p, plen, (char *) dst + 4, &tmpl);
213
48.4k
  if (rc != IDN2_OK)
214
2.58k
    goto out;
215
216
217
45.8k
  *dstlen = 4 + tmpl;
218
219
45.8k
  if (check_roundtrip)
220
7.52k
    {
221
7.52k
      if (srclen_org != *dstlen
222
7.52k
    || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
223
0
  {
224
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
225
0
    goto out;
226
0
  }
227
7.52k
    }
228
38.3k
  else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
229
38.3k
    {
230
38.3k
      rc = idn2_punycode_decode ((char *) dst + 4, *dstlen - 4,
231
38.3k
         label_u32, &label32_len);
232
38.3k
      if (rc)
233
0
  {
234
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
235
0
    goto out;
236
0
  }
237
238
38.3k
      if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
239
0
  {
240
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
241
0
    goto out;
242
0
  }
243
38.3k
    }
244
245
45.8k
  rc = IDN2_OK;
246
247
63.3k
out:
248
63.3k
  free (p);
249
63.3k
  free (src_allocated);
250
63.3k
  return rc;
251
45.8k
}
252
253
#define TR46_TRANSITIONAL_CHECK \
254
32.1k
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
255
#define TR46_NONTRANSITIONAL_CHECK \
256
131k
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
257
258
static int
259
_tr46 (const uint8_t *domain_u8, uint8_t **out, int flags)
260
111k
{
261
111k
  size_t len, it;
262
111k
  uint32_t *domain_u32;
263
111k
  int err = IDN2_OK, rc;
264
111k
  int transitional = 0;
265
111k
  int test_flags;
266
267
111k
  if (flags & IDN2_TRANSITIONAL)
268
51.4k
    transitional = 1;
269
270
  /* convert UTF-8 to UTF-32 */
271
111k
  if (!(domain_u32 =
272
111k
  u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
273
17.0k
    {
274
17.0k
      if (errno == ENOMEM)
275
0
  return IDN2_MALLOC;
276
17.0k
      return IDN2_ENCODING_ERROR;
277
17.0k
    }
278
279
94.6k
  size_t len2 = 0;
280
1.49M
  for (it = 0; it < len - 1; it++)
281
1.43M
    {
282
1.43M
      IDNAMap map;
283
284
1.43M
      get_idna_map (domain_u32[it], &map);
285
286
1.43M
      if (map_is (&map, TR46_FLG_DISALLOWED))
287
32.5k
  {
288
32.5k
    if (domain_u32[it])
289
32.5k
      {
290
32.5k
        free (domain_u32);
291
32.5k
        return IDN2_DISALLOWED;
292
32.5k
      }
293
0
    len2++;
294
0
  }
295
1.40M
      else if (map_is (&map, TR46_FLG_MAPPED))
296
384k
  {
297
384k
    len2 += map.nmappings;
298
384k
  }
299
1.01M
      else if (map_is (&map, TR46_FLG_VALID))
300
782k
  {
301
782k
    len2++;
302
782k
  }
303
233k
      else if (map_is (&map, TR46_FLG_IGNORED))
304
20.6k
  {
305
20.6k
    continue;
306
20.6k
  }
307
213k
      else if (map_is (&map, TR46_FLG_DEVIATION))
308
71.7k
  {
309
71.7k
    if (transitional)
310
35.2k
      {
311
35.2k
        len2 += map.nmappings;
312
35.2k
      }
313
36.5k
    else
314
36.5k
      len2++;
315
71.7k
  }
316
141k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
317
0
  {
318
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
319
0
      {
320
        /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
321
0
        len2++;
322
0
      }
323
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
324
0
      {
325
        /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
326
0
        len2 += map.nmappings;
327
0
      }
328
0
  }
329
1.43M
    }
330
331
  /* Exit early if result is too long.
332
   * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
333
62.0k
  if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
334
581
    {
335
581
      free (domain_u32);
336
581
      return IDN2_TOO_BIG_DOMAIN;
337
581
    }
338
339
61.4k
  uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
340
61.4k
  if (!tmp)
341
0
    {
342
0
      free (domain_u32);
343
0
      return IDN2_MALLOC;
344
0
    }
345
346
61.4k
  len2 = 0;
347
924k
  for (it = 0; it < len - 1; it++)
348
863k
    {
349
863k
      uint32_t c = domain_u32[it];
350
863k
      IDNAMap map;
351
352
863k
      get_idna_map (c, &map);
353
354
863k
      if (map_is (&map, TR46_FLG_DISALLOWED))
355
0
  {
356
0
    tmp[len2++] = c;
357
0
  }
358
863k
      else if (map_is (&map, TR46_FLG_MAPPED))
359
219k
  {
360
219k
    len2 += get_map_data (tmp + len2, &map);
361
219k
  }
362
643k
      else if (map_is (&map, TR46_FLG_VALID))
363
489k
  {
364
489k
    tmp[len2++] = c;
365
489k
  }
366
154k
      else if (map_is (&map, TR46_FLG_IGNORED))
367
15.6k
  {
368
15.6k
    continue;
369
15.6k
  }
370
138k
      else if (map_is (&map, TR46_FLG_DEVIATION))
371
36.5k
  {
372
36.5k
    if (transitional)
373
17.5k
      {
374
17.5k
        len2 += get_map_data (tmp + len2, &map);
375
17.5k
      }
376
19.0k
    else
377
19.0k
      tmp[len2++] = c;
378
36.5k
  }
379
102k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
380
0
  {
381
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
382
0
      {
383
0
        tmp[len2++] = c;
384
0
      }
385
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
386
0
      {
387
0
        len2 += get_map_data (tmp + len2, &map);
388
0
      }
389
0
  }
390
863k
    }
391
61.4k
  free (domain_u32);
392
393
  /* Normalize to NFC */
394
61.4k
  tmp[len2] = 0;
395
61.4k
  domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
396
61.4k
  free (tmp);
397
61.4k
  tmp = NULL;
398
399
61.4k
  if (!domain_u32)
400
0
    {
401
0
      if (errno == ENOMEM)
402
0
  return IDN2_MALLOC;
403
0
      return IDN2_ENCODING_ERROR;
404
0
    }
405
406
  /* split into labels and check */
407
61.4k
  uint32_t *e, *s;
408
152k
  for (e = s = domain_u32; *e; s = e)
409
99.5k
    {
410
1.04M
      while (*e && *e != '.')
411
941k
  e++;
412
413
99.5k
      if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
414
27.8k
    && s[3] == '-')
415
26.6k
  {
416
    /* decode punycode and check result non-transitional */
417
26.6k
    size_t ace_len;
418
26.6k
    uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
419
26.6k
    size_t name_len = IDN2_LABEL_MAX_LENGTH;
420
26.6k
    uint8_t *ace;
421
422
26.6k
    ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
423
26.6k
    if (!ace)
424
0
      {
425
0
        free (domain_u32);
426
0
        if (errno == ENOMEM)
427
0
    return IDN2_MALLOC;
428
0
        return IDN2_ENCODING_ERROR;
429
0
      }
430
431
26.6k
    rc = idn2_punycode_decode ((char *) ace, ace_len,
432
26.6k
             name_u32, &name_len);
433
434
26.6k
    free (ace);
435
436
26.6k
    if (rc)
437
8.37k
      {
438
8.37k
        free (domain_u32);
439
8.37k
        return rc;
440
8.37k
      }
441
442
18.2k
    test_flags = TR46_NONTRANSITIONAL_CHECK;
443
444
18.2k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
445
0
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
446
447
18.2k
    if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
448
6.93k
      err = rc;
449
18.2k
  }
450
72.9k
      else
451
72.9k
  {
452
72.9k
    test_flags =
453
72.9k
      transitional ? TR46_TRANSITIONAL_CHECK :
454
72.9k
      TR46_NONTRANSITIONAL_CHECK;
455
456
72.9k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
457
0
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
458
459
72.9k
    if ((rc = _idn2_label_test (test_flags, s, e - s)))
460
6.47k
      err = rc;
461
72.9k
  }
462
463
91.1k
      if (*e)
464
39.5k
  e++;
465
91.1k
    }
466
467
53.1k
  if (err == IDN2_OK && out)
468
42.3k
    {
469
42.3k
      uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
470
42.3k
      free (domain_u32);
471
472
42.3k
      if (!_out)
473
0
  {
474
0
    if (errno == ENOMEM)
475
0
      return IDN2_MALLOC;
476
0
    return IDN2_ENCODING_ERROR;
477
0
  }
478
479
42.3k
      *out = _out;
480
42.3k
    }
481
10.7k
  else
482
53.1k
    free (domain_u32);
483
484
53.1k
  return err;
485
53.1k
}
486
487
/**
488
 * idn2_lookup_u8:
489
 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
490
 * @lookupname: newly allocated output variable with name to lookup in DNS.
491
 * @flags: optional #idn2_flags to modify behaviour.
492
 *
493
 * Perform IDNA2008 lookup string conversion on domain name @src, as
494
 * described in section 5 of RFC 5891.  Note that the input string
495
 * must be encoded in UTF-8 and be in Unicode NFC form.
496
 *
497
 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
498
 * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
499
 * do already imply %IDN2_NFC_INPUT.
500
 *
501
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
502
 * convert any input A-labels to U-labels and perform additional
503
 * testing. This is default since version 2.2.
504
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
505
 *
506
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
507
 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
508
 * Unicode TR46 non-transitional processing.
509
 *
510
 * Multiple flags may be specified by binary or:ing them together.
511
 *
512
 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
513
 * Previously we were eliminating non-STD3 characters from domain strings
514
 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
515
 * functions. That was an unexpected regression for applications switching
516
 * from libidn and thus it is no longer applied by default.
517
 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
518
 *
519
 * After version 0.11: @lookupname may be NULL to test lookup of @src
520
 * without allocating memory.
521
 *
522
 * Returns: On successful conversion %IDN2_OK is returned, if the
523
 *   output domain or any label would have been too long
524
 *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
525
 *   another error code is returned.
526
 *
527
 * Since: 0.1
528
 **/
529
int
530
idn2_lookup_u8 (const uint8_t *src, uint8_t **lookupname, int flags)
531
111k
{
532
111k
  size_t lookupnamelen = 0;
533
111k
  uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
534
111k
  uint8_t *src_allocated = NULL;
535
111k
  int rc;
536
537
111k
  if (src == NULL)
538
0
    {
539
0
      if (lookupname)
540
0
  *lookupname = NULL;
541
0
      return IDN2_OK;
542
0
    }
543
544
111k
  rc = set_default_flags (&flags);
545
111k
  if (rc != IDN2_OK)
546
0
    return rc;
547
548
111k
  if (!(flags & IDN2_NO_TR46))
549
111k
    {
550
111k
      uint8_t *out = NULL;
551
552
111k
      rc = _tr46 (src, &out, flags);
553
111k
      if (rc != IDN2_OK)
554
69.3k
  return rc;
555
556
42.3k
      src = src_allocated = out;
557
42.3k
    }
558
559
42.3k
  do
560
71.1k
    {
561
71.1k
      const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
562
      /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
563
         and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
564
71.1k
      size_t labellen = end - src;
565
71.1k
      uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
566
71.1k
      size_t tmplen = IDN2_LABEL_MAX_LENGTH;
567
568
71.1k
      rc = label (src, labellen, tmp, &tmplen, flags);
569
71.1k
      if (rc != IDN2_OK)
570
18.5k
  {
571
18.5k
    free (src_allocated);
572
18.5k
    return rc;
573
18.5k
  }
574
575
52.6k
      if (lookupnamelen + tmplen
576
52.6k
    > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
577
1.04k
  {
578
1.04k
    free (src_allocated);
579
1.04k
    return IDN2_TOO_BIG_DOMAIN;
580
1.04k
  }
581
582
51.5k
      memcpy (_lookupname + lookupnamelen, tmp, tmplen);
583
51.5k
      lookupnamelen += tmplen;
584
585
51.5k
      if (*end == '.')
586
28.8k
  {
587
28.8k
    if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
588
0
      {
589
0
        free (src_allocated);
590
0
        return IDN2_TOO_BIG_DOMAIN;
591
0
      }
592
593
28.8k
    _lookupname[lookupnamelen] = '.';
594
28.8k
    lookupnamelen++;
595
28.8k
  }
596
51.5k
      _lookupname[lookupnamelen] = '\0';
597
598
51.5k
      src = end;
599
51.5k
    }
600
51.5k
  while (*src++);
601
602
42.3k
  free (src_allocated);
603
604
22.7k
  if (lookupname)
605
22.7k
    {
606
22.7k
      uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
607
608
22.7k
      if (tmp == NULL)
609
0
  return IDN2_MALLOC;
610
611
22.7k
      memcpy (tmp, _lookupname, lookupnamelen + 1);
612
22.7k
      *lookupname = tmp;
613
22.7k
    }
614
615
22.7k
  return IDN2_OK;
616
22.7k
}
617
618
/**
619
 * idn2_lookup_ul:
620
 * @src: input zero-terminated locale encoded string.
621
 * @lookupname: newly allocated output variable with name to lookup in DNS.
622
 * @flags: optional #idn2_flags to modify behaviour.
623
 *
624
 * Perform IDNA2008 lookup string conversion on domain name @src, as
625
 * described in section 5 of RFC 5891.  Note that the input is assumed
626
 * to be encoded in the locale's default coding system, and will be
627
 * transcoded to UTF-8 and NFC normalized by this function.
628
 *
629
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
630
 * convert any input A-labels to U-labels and perform additional
631
 * testing. This is default since version 2.2.
632
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
633
 *
634
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
635
 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
636
 * processing.
637
 *
638
 * Multiple flags may be specified by binary or:ing them together, for
639
 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
640
 *
641
 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
642
 *
643
 * After version 0.11: @lookupname may be NULL to test lookup of @src
644
 * without allocating memory.
645
 *
646
 * Returns: On successful conversion %IDN2_OK is returned, if
647
 *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
648
 *   returned, if the output domain or any label would have been too
649
 *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
650
 *   another error code is returned.
651
 *
652
 * Since: 0.1
653
 **/
654
int
655
idn2_lookup_ul (const char *src, char **lookupname, int flags)
656
0
{
657
0
  uint8_t *utf8src = NULL;
658
0
  int rc;
659
660
0
  if (src)
661
0
    {
662
0
      const char *encoding = locale_charset ();
663
664
0
      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
665
666
0
      if (!utf8src)
667
0
  {
668
0
    if (errno == ENOMEM)
669
0
      return IDN2_MALLOC;
670
0
    return IDN2_ICONV_FAIL;
671
0
  }
672
0
    }
673
674
0
  rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
675
0
           flags | IDN2_NFC_INPUT);
676
677
0
  free (utf8src);
678
679
0
  return rc;
680
0
}
681
682
/**
683
 * idn2_to_ascii_4i:
684
 * @input: zero terminated input Unicode (UCS-4) string.
685
 * @inlen: number of elements in @input.
686
 * @output: output zero terminated string that must have room for at
687
 *       least 63 characters plus the terminating zero.
688
 * @flags: optional #idn2_flags to modify behaviour.
689
 *
690
 * The ToASCII operation takes a sequence of Unicode code points that make
691
 * up one domain label and transforms it into a sequence of code points in
692
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
693
 * the resulting sequence are equivalent labels.
694
 *
695
 * It is important to note that the ToASCII operation can fail.
696
 * ToASCII fails if any step of it fails. If any step of the
697
 * ToASCII operation fails on any label in a domain name, that domain
698
 * name MUST NOT be used as an internationalized domain name.
699
 * The method for dealing with this failure is application-specific.
700
 *
701
 * The inputs to ToASCII are a sequence of code points.
702
 *
703
 * ToASCII never alters a sequence of code points that are all in the ASCII
704
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
705
 * effect as applying it just once.
706
 *
707
 * The default behavior of this function (when flags are zero) is to apply
708
 * the IDNA2008 rules without the TR46 amendments. As the TR46
709
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
710
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
711
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
712
 *
713
 * Warning: With version 2.1.1 until before version 2.3.5 this
714
 * function was deprecated in favor idn2_to_ascii_4i2().  We still
715
 * encourage you to use idn2_to_ascii_4i2() when appropriate.
716
 *
717
 * Returns: On successful conversion %IDN2_OK is returned; if the
718
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
719
 *   returned, or another error code is returned.
720
 *
721
 * Since: 2.0.0
722
 **/
723
int
724
idn2_to_ascii_4i (const uint32_t *input, size_t inlen, char *output,
725
      int flags)
726
0
{
727
0
  char *out;
728
0
  int rc;
729
730
0
  if (!input)
731
0
    {
732
0
      if (output)
733
0
  *output = 0;
734
0
      return IDN2_OK;
735
0
    }
736
737
0
  rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
738
0
  if (rc == IDN2_OK)
739
0
    {
740
0
      size_t len = strlen (out);
741
742
0
      if (len > IDN2_LABEL_MAX_LENGTH)
743
0
  rc = IDN2_TOO_BIG_LABEL;
744
0
      else if (output)
745
0
  strcpy (output, out);
746
747
0
      free (out);
748
0
    }
749
750
0
  return rc;
751
0
}
752
753
/**
754
 * idn2_to_ascii_4i2:
755
 * @input: zero terminated input Unicode (UCS-4) string.
756
 * @inlen: number of elements in @input.
757
 * @output: pointer to newly allocated zero-terminated output string.
758
 * @flags: optional #idn2_flags to modify behaviour.
759
 *
760
 * The ToASCII operation takes a sequence of Unicode code points that make
761
 * up one domain label and transforms it into a sequence of code points in
762
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
763
 * the resulting sequence are equivalent labels.
764
 *
765
 * It is important to note that the ToASCII operation can fail.
766
 * ToASCII fails if any step of it fails. If any step of the
767
 * ToASCII operation fails on any label in a domain name, that domain
768
 * name MUST NOT be used as an internationalized domain name.
769
 * The method for dealing with this failure is application-specific.
770
 *
771
 * The inputs to ToASCII are a sequence of code points.
772
 *
773
 * ToASCII never alters a sequence of code points that are all in the ASCII
774
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
775
 * effect as applying it just once.
776
 *
777
 * The default behavior of this function (when flags are zero) is to apply
778
 * the IDNA2008 rules without the TR46 amendments. As the TR46
779
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
780
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
781
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
782
 *
783
 * Returns: On successful conversion %IDN2_OK is returned; if the
784
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
785
 *   returned, or another error code is returned.
786
 *
787
 * Since: 2.1.1
788
 **/
789
int
790
idn2_to_ascii_4i2 (const uint32_t *input, size_t inlen, char **output,
791
       int flags)
792
0
{
793
0
  uint32_t *input_u32;
794
0
  uint8_t *input_u8, *output_u8;
795
0
  size_t length;
796
0
  int rc;
797
798
0
  if (!input)
799
0
    {
800
0
      if (output)
801
0
  *output = NULL;
802
0
      return IDN2_OK;
803
0
    }
804
805
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
806
0
  if (!input_u32)
807
0
    return IDN2_MALLOC;
808
809
0
  u32_cpy (input_u32, input, inlen);
810
0
  input_u32[inlen] = 0;
811
812
0
  input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
813
0
  free (input_u32);
814
0
  if (!input_u8)
815
0
    {
816
0
      if (errno == ENOMEM)
817
0
  return IDN2_MALLOC;
818
0
      return IDN2_ENCODING_ERROR;
819
0
    }
820
821
0
  rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
822
0
  free (input_u8);
823
824
0
  if (rc == IDN2_OK)
825
0
    {
826
0
      if (output)
827
0
  *output = (char *) output_u8;
828
0
      else
829
0
  free (output_u8);
830
0
    }
831
832
0
  return rc;
833
0
}
834
835
/**
836
 * idn2_to_ascii_4z:
837
 * @input: zero terminated input Unicode (UCS-4) string.
838
 * @output: pointer to newly allocated zero-terminated output string.
839
 * @flags: optional #idn2_flags to modify behaviour.
840
 *
841
 * Convert UCS-4 domain name to ASCII string using the IDNA2008
842
 * rules.  The domain name may contain several labels, separated by dots.
843
 * The output buffer must be deallocated by the caller.
844
 *
845
 * The default behavior of this function (when flags are zero) is to apply
846
 * the IDNA2008 rules without the TR46 amendments. As the TR46
847
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
848
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
849
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
850
 *
851
 * Return value: Returns %IDN2_OK on success, or error code.
852
 *
853
 * Since: 2.0.0
854
 **/
855
int
856
idn2_to_ascii_4z (const uint32_t *input, char **output, int flags)
857
0
{
858
0
  uint8_t *input_u8;
859
0
  size_t length;
860
0
  int rc;
861
862
0
  if (!input)
863
0
    {
864
0
      if (output)
865
0
  *output = NULL;
866
0
      return IDN2_OK;
867
0
    }
868
869
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
870
0
  if (!input_u8)
871
0
    {
872
0
      if (errno == ENOMEM)
873
0
  return IDN2_MALLOC;
874
0
      return IDN2_ENCODING_ERROR;
875
0
    }
876
877
0
  rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
878
0
  free (input_u8);
879
880
0
  return rc;
881
0
}
882
883
/**
884
 * idn2_to_ascii_8z:
885
 * @input: zero terminated input UTF-8 string.
886
 * @output: pointer to newly allocated output string.
887
 * @flags: optional #idn2_flags to modify behaviour.
888
 *
889
 * Convert UTF-8 domain name to ASCII string using the IDNA2008
890
 * rules.  The domain name may contain several labels, separated by dots.
891
 * The output buffer must be deallocated by the caller.
892
 *
893
 * The default behavior of this function (when flags are zero) is to apply
894
 * the IDNA2008 rules without the TR46 amendments. As the TR46
895
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
896
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
897
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
898
 *
899
 * Return value: Returns %IDN2_OK on success, or error code.
900
 *
901
 * Since: 2.0.0
902
 **/
903
int
904
idn2_to_ascii_8z (const char *input, char **output, int flags)
905
0
{
906
0
  return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
907
0
}
908
909
/**
910
 * idn2_to_ascii_lz:
911
 * @input: zero terminated input UTF-8 string.
912
 * @output: pointer to newly allocated output string.
913
 * @flags: optional #idn2_flags to modify behaviour.
914
 *
915
 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
916
 * rules.  The domain name may contain several labels, separated by dots.
917
 * The output buffer must be deallocated by the caller.
918
 *
919
 * The default behavior of this function (when flags are zero) is to apply
920
 * the IDNA2008 rules without the TR46 amendments. As the TR46
921
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
922
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
923
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
924
 *
925
 * Returns: %IDN2_OK on success, or error code.
926
 * Same as described in idn2_lookup_ul() documentation.
927
 *
928
 * Since: 2.0.0
929
 **/
930
int
931
idn2_to_ascii_lz (const char *input, char **output, int flags)
932
0
{
933
0
  return idn2_lookup_ul (input, output, flags);
934
0
}