Coverage Report

Created: 2026-01-13 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/lookup.c
Line
Count
Source
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2025 Simon Josefsson
3
   Copyright (C) 2017-2025 Tim Ruehsen
4
5
   Libidn2 is free software: you can redistribute it and/or modify it
6
   under the terms of either:
7
8
     * the GNU Lesser General Public License as published by the Free
9
       Software Foundation; either version 3 of the License, or (at
10
       your option) any later version.
11
12
   or
13
14
     * the GNU General Public License as published by the Free
15
       Software Foundation; either version 2 of the License, or (at
16
       your option) any later version.
17
18
   or both in parallel, as here.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License for more details.
24
25
   You should have received copies of the GNU General Public License and
26
   the GNU Lesser General Public License along with this program.  If
27
   not, see <http://www.gnu.org/licenses/>.
28
*/
29
30
#include <config.h>
31
32
#include "idn2.h"
33
34
#include <errno.h>    /* errno */
35
#include <stdlib.h>   /* malloc, free */
36
37
#include <unitypes.h>
38
#include <uniconv.h>    /* u8_strconv_from_locale */
39
#include <uninorm.h>    /* u32_normalize */
40
#include <unistr.h>   /* u8_to_u32 */
41
42
#include "idna.h"   /* _idn2_label_test */
43
#include "tr46map.h"    /* definition for tr46map.c */
44
45
#ifdef HAVE_LIBUNISTRING
46
/* copied from gnulib */
47
# include <limits.h>
48
# define _C_CTYPE_LOWER_N(N) \
49
   case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
50
   case 'e' + (N): case 'f' + (N): \
51
   case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
52
   case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
53
   case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
54
   case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
55
   case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
56
# define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
57
static inline int
58
c_tolower (int c)
59
{
60
  switch (c)
61
    {
62
    _C_CTYPE_UPPER:
63
      return c - 'A' + 'a';
64
    default:
65
      return c;
66
    }
67
}
68
69
static int
70
c_strncasecmp (const char *s1, const char *s2, size_t n)
71
{
72
  register const unsigned char *p1 = (const unsigned char *) s1;
73
  register const unsigned char *p2 = (const unsigned char *) s2;
74
  unsigned char c1, c2;
75
76
  if (p1 == p2 || n == 0)
77
    return 0;
78
79
  do
80
    {
81
      c1 = c_tolower (*p1);
82
      c2 = c_tolower (*p2);
83
84
      if (--n == 0 || c1 == '\0')
85
  break;
86
87
      ++p1;
88
      ++p2;
89
    }
90
  while (c1 == c2);
91
92
  if (UCHAR_MAX <= INT_MAX)
93
    return c1 - c2;
94
  else
95
    /* On machines where 'char' and 'int' are types of the same size, the
96
       difference of two 'unsigned char' values - including the sign bit -
97
       doesn't fit in an 'int'.  */
98
    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
99
}
100
#else
101
# include <c-strcase.h>
102
#endif
103
104
static int
105
set_default_flags (int *flags)
106
25.5k
{
107
25.5k
  if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
108
0
    return IDN2_INVALID_FLAGS;
109
110
25.5k
  if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
111
16.8k
      && ((*flags) & IDN2_NO_TR46))
112
0
    return IDN2_INVALID_FLAGS;
113
114
25.5k
  if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
115
0
      && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
116
0
    return IDN2_INVALID_FLAGS;
117
118
25.5k
  if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
119
17.1k
    *flags |= IDN2_NONTRANSITIONAL;
120
121
25.5k
  return IDN2_OK;
122
25.5k
}
123
124
static int
125
label (const uint8_t *src, size_t srclen, uint8_t *dst, size_t *dstlen,
126
       int flags)
127
26.0k
{
128
26.0k
  size_t plen;
129
26.0k
  uint32_t *p = NULL;
130
26.0k
  const uint8_t *src_org = NULL;
131
26.0k
  uint8_t *src_allocated = NULL;
132
26.0k
  int rc, check_roundtrip = 0;
133
26.0k
  size_t tmpl, srclen_org = 0;
134
26.0k
  uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
135
26.0k
  size_t label32_len = IDN2_LABEL_MAX_LENGTH;
136
137
26.0k
  if (_idn2_ascii_p (src, srclen))
138
11.7k
    {
139
11.7k
      if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
140
5.34k
    && memcmp (src, "xn--", 4) == 0)
141
4.07k
  {
142
    /*
143
       If the input to this procedure appears to be an A-label
144
       (i.e., it starts in "xn--", interpreted
145
       case-insensitively), the lookup application MAY attempt to
146
       convert it to a U-label, first ensuring that the A-label is
147
       entirely in lowercase (converting it to lowercase if
148
       necessary), and apply the tests of Section 5.4 and the
149
       conversion of Section 5.5 to that form. */
150
4.07k
    rc = idn2_punycode_decode ((char *) src + 4, srclen - 4,
151
4.07k
             label_u32, &label32_len);
152
4.07k
    if (rc)
153
0
      return rc;
154
155
4.07k
    check_roundtrip = 1;
156
4.07k
    src_org = src;
157
4.07k
    srclen_org = srclen;
158
159
4.07k
    srclen = IDN2_LABEL_MAX_LENGTH;
160
4.07k
    src = src_allocated =
161
4.07k
      u32_to_u8 (label_u32, label32_len, NULL, &srclen);
162
4.07k
    if (!src)
163
0
      {
164
0
        if (errno == ENOMEM)
165
0
    return IDN2_MALLOC;
166
0
        return IDN2_ENCODING_ERROR;
167
0
      }
168
4.07k
  }
169
7.65k
      else
170
7.65k
  {
171
7.65k
    if (srclen > IDN2_LABEL_MAX_LENGTH)
172
41
      return IDN2_TOO_BIG_LABEL;
173
7.61k
    if (srclen > *dstlen)
174
0
      return IDN2_TOO_BIG_DOMAIN;
175
176
7.61k
    memcpy (dst, src, srclen);
177
7.61k
    *dstlen = srclen;
178
7.61k
    return IDN2_OK;
179
7.61k
  }
180
11.7k
    }
181
182
18.4k
  rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
183
18.4k
  if (rc != IDN2_OK)
184
0
    goto out;
185
186
18.4k
  if (!(flags & IDN2_TRANSITIONAL))
187
12.4k
    {
188
12.4k
      rc = _idn2_label_test (TEST_NFC |
189
12.4k
           TEST_2HYPHEN |
190
12.4k
           TEST_LEADING_COMBINING |
191
12.4k
           TEST_DISALLOWED |
192
12.4k
           TEST_CONTEXTJ_RULE |
193
12.4k
           TEST_CONTEXTO_WITH_RULE |
194
12.4k
           TEST_UNASSIGNED | TEST_BIDI |
195
12.4k
           ((flags & IDN2_NONTRANSITIONAL) ?
196
12.4k
            TEST_NONTRANSITIONAL : 0) | ((flags &
197
12.4k
                  IDN2_USE_STD3_ASCII_RULES)
198
12.4k
                 ? 0 :
199
12.4k
                 TEST_ALLOW_STD3_DISALLOWED),
200
12.4k
           p, plen);
201
202
12.4k
      if (rc != IDN2_OK)
203
3.93k
  goto out;
204
12.4k
    }
205
206
14.5k
  dst[0] = 'x';
207
14.5k
  dst[1] = 'n';
208
14.5k
  dst[2] = '-';
209
14.5k
  dst[3] = '-';
210
211
14.5k
  tmpl = *dstlen - 4;
212
14.5k
  rc = idn2_punycode_encode (p, plen, (char *) dst + 4, &tmpl);
213
14.5k
  if (rc != IDN2_OK)
214
319
    goto out;
215
216
217
14.1k
  *dstlen = 4 + tmpl;
218
219
14.1k
  if (check_roundtrip)
220
3.31k
    {
221
3.31k
      if (srclen_org != *dstlen
222
3.31k
    || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
223
134
  {
224
134
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
225
134
    goto out;
226
134
  }
227
3.31k
    }
228
10.8k
  else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
229
10.8k
    {
230
10.8k
      rc = idn2_punycode_decode ((char *) dst + 4, *dstlen - 4,
231
10.8k
         label_u32, &label32_len);
232
10.8k
      if (rc)
233
1.13k
  {
234
1.13k
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
235
1.13k
    goto out;
236
1.13k
  }
237
238
9.73k
      if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
239
0
  {
240
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
241
0
    goto out;
242
0
  }
243
9.73k
    }
244
245
12.9k
  rc = IDN2_OK;
246
247
18.4k
out:
248
18.4k
  free (p);
249
18.4k
  free (src_allocated);
250
18.4k
  return rc;
251
12.9k
}
252
253
#define TR46_TRANSITIONAL_CHECK \
254
7.89k
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
255
#define TR46_NONTRANSITIONAL_CHECK \
256
50.2k
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
257
258
static int
259
_tr46 (const uint8_t *domain_u8, uint8_t **out, int flags)
260
25.5k
{
261
25.5k
  size_t len, it;
262
25.5k
  uint32_t *domain_u32;
263
25.5k
  int err = IDN2_OK, rc;
264
25.5k
  int transitional = 0;
265
25.5k
  int test_flags;
266
267
25.5k
  if (flags & IDN2_TRANSITIONAL)
268
8.42k
    transitional = 1;
269
270
  /* convert UTF-8 to UTF-32 */
271
25.5k
  if (!(domain_u32 =
272
25.5k
  u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
273
1.71k
    {
274
1.71k
      if (errno == ENOMEM)
275
0
  return IDN2_MALLOC;
276
1.71k
      return IDN2_ENCODING_ERROR;
277
1.71k
    }
278
279
23.8k
  size_t len2 = 0;
280
344k
  for (it = 0; it < len - 1; it++)
281
321k
    {
282
321k
      IDNAMap map;
283
284
321k
      get_idna_map (domain_u32[it], &map);
285
286
321k
      if (map_is (&map, TR46_FLG_DISALLOWED))
287
770
  {
288
770
    if (domain_u32[it])
289
770
      {
290
770
        free (domain_u32);
291
770
        return IDN2_DISALLOWED;
292
770
      }
293
0
    len2++;
294
0
  }
295
320k
      else if (map_is (&map, TR46_FLG_MAPPED))
296
92.1k
  {
297
92.1k
    len2 += map.nmappings;
298
92.1k
  }
299
228k
      else if (map_is (&map, TR46_FLG_VALID))
300
142k
  {
301
142k
    len2++;
302
142k
  }
303
86.4k
      else if (map_is (&map, TR46_FLG_IGNORED))
304
510
  {
305
510
    continue;
306
510
  }
307
85.9k
      else if (map_is (&map, TR46_FLG_DEVIATION))
308
9.00k
  {
309
9.00k
    if (transitional)
310
2.83k
      {
311
2.83k
        len2 += map.nmappings;
312
2.83k
      }
313
6.17k
    else
314
6.17k
      len2++;
315
9.00k
  }
316
76.9k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
317
51.4k
  {
318
51.4k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
319
42.1k
      {
320
        /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
321
42.1k
        len2++;
322
42.1k
      }
323
9.35k
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
324
9.35k
      {
325
        /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
326
9.35k
        len2 += map.nmappings;
327
9.35k
      }
328
51.4k
  }
329
321k
    }
330
331
  /* Exit early if result is too long.
332
   * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
333
23.0k
  if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
334
205
    {
335
205
      free (domain_u32);
336
205
      return IDN2_TOO_BIG_DOMAIN;
337
205
    }
338
339
22.8k
  uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
340
22.8k
  if (!tmp)
341
0
    {
342
0
      free (domain_u32);
343
0
      return IDN2_MALLOC;
344
0
    }
345
346
22.8k
  len2 = 0;
347
275k
  for (it = 0; it < len - 1; it++)
348
252k
    {
349
252k
      uint32_t c = domain_u32[it];
350
252k
      IDNAMap map;
351
352
252k
      get_idna_map (c, &map);
353
354
252k
      if (map_is (&map, TR46_FLG_DISALLOWED))
355
0
  {
356
0
    tmp[len2++] = c;
357
0
  }
358
252k
      else if (map_is (&map, TR46_FLG_MAPPED))
359
81.4k
  {
360
81.4k
    len2 += get_map_data (tmp + len2, &map);
361
81.4k
  }
362
170k
      else if (map_is (&map, TR46_FLG_VALID))
363
119k
  {
364
119k
    tmp[len2++] = c;
365
119k
  }
366
51.2k
      else if (map_is (&map, TR46_FLG_IGNORED))
367
498
  {
368
498
    continue;
369
498
  }
370
50.7k
      else if (map_is (&map, TR46_FLG_DEVIATION))
371
8.91k
  {
372
8.91k
    if (transitional)
373
2.81k
      {
374
2.81k
        len2 += get_map_data (tmp + len2, &map);
375
2.81k
      }
376
6.10k
    else
377
6.10k
      tmp[len2++] = c;
378
8.91k
  }
379
41.8k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
380
21.1k
  {
381
21.1k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
382
17.0k
      {
383
17.0k
        tmp[len2++] = c;
384
17.0k
      }
385
4.09k
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
386
4.09k
      {
387
4.09k
        len2 += get_map_data (tmp + len2, &map);
388
4.09k
      }
389
21.1k
  }
390
252k
    }
391
22.8k
  free (domain_u32);
392
393
  /* Normalize to NFC */
394
22.8k
  tmp[len2] = 0;
395
22.8k
  domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
396
22.8k
  free (tmp);
397
22.8k
  tmp = NULL;
398
399
22.8k
  if (!domain_u32)
400
0
    {
401
0
      if (errno == ENOMEM)
402
0
  return IDN2_MALLOC;
403
0
      return IDN2_ENCODING_ERROR;
404
0
    }
405
406
  /* split into labels and check */
407
22.8k
  uint32_t *e, *s;
408
56.5k
  for (e = s = domain_u32; *e; s = e)
409
35.5k
    {
410
354k
      while (*e && *e != '.')
411
318k
  e++;
412
413
35.5k
      if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
414
11.7k
    && s[3] == '-')
415
11.1k
  {
416
    /* decode punycode and check result non-transitional */
417
11.1k
    size_t ace_len;
418
11.1k
    uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
419
11.1k
    size_t name_len = IDN2_LABEL_MAX_LENGTH;
420
11.1k
    uint8_t *ace;
421
422
11.1k
    ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
423
11.1k
    if (!ace)
424
0
      {
425
0
        free (domain_u32);
426
0
        if (errno == ENOMEM)
427
0
    return IDN2_MALLOC;
428
0
        return IDN2_ENCODING_ERROR;
429
0
      }
430
431
11.1k
    rc = idn2_punycode_decode ((char *) ace, ace_len,
432
11.1k
             name_u32, &name_len);
433
434
11.1k
    free (ace);
435
436
11.1k
    if (rc)
437
1.86k
      {
438
1.86k
        free (domain_u32);
439
1.86k
        return rc;
440
1.86k
      }
441
442
9.23k
    test_flags = TR46_NONTRANSITIONAL_CHECK;
443
444
9.23k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
445
6.09k
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
446
447
9.23k
    if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
448
4.70k
      err = rc;
449
9.23k
  }
450
24.4k
      else
451
24.4k
  {
452
24.4k
    test_flags =
453
24.4k
      transitional ? TR46_TRANSITIONAL_CHECK :
454
24.4k
      TR46_NONTRANSITIONAL_CHECK;
455
456
24.4k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
457
16.6k
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
458
459
24.4k
    if ((rc = _idn2_label_test (test_flags, s, e - s)))
460
2.86k
      err = rc;
461
24.4k
  }
462
463
33.6k
      if (*e)
464
13.9k
  e++;
465
33.6k
    }
466
467
21.0k
  if (err == IDN2_OK && out)
468
16.0k
    {
469
16.0k
      uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
470
16.0k
      free (domain_u32);
471
472
16.0k
      if (!_out)
473
0
  {
474
0
    if (errno == ENOMEM)
475
0
      return IDN2_MALLOC;
476
0
    return IDN2_ENCODING_ERROR;
477
0
  }
478
479
16.0k
      *out = _out;
480
16.0k
    }
481
4.92k
  else
482
21.0k
    free (domain_u32);
483
484
21.0k
  return err;
485
21.0k
}
486
487
/**
488
 * idn2_lookup_u8:
489
 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
490
 * @lookupname: newly allocated output variable with name to lookup in DNS.
491
 * @flags: optional #idn2_flags to modify behaviour.
492
 *
493
 * Perform IDNA2008 lookup string conversion on domain name @src, as
494
 * described in section 5 of RFC 5891.  Note that the input string
495
 * must be encoded in UTF-8 and be in Unicode NFC form.
496
 *
497
 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
498
 * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
499
 * do already imply %IDN2_NFC_INPUT.
500
 *
501
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
502
 * convert any input A-labels to U-labels and perform additional
503
 * testing. This is default since version 2.2.
504
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
505
 *
506
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
507
 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
508
 * Unicode TR46 non-transitional processing.
509
 *
510
 * Multiple flags may be specified by binary or:ing them together.
511
 *
512
 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
513
 * Previously we were eliminating non-STD3 characters from domain strings
514
 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
515
 * functions. That was an unexpected regression for applications switching
516
 * from libidn and thus it is no longer applied by default.
517
 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
518
 *
519
 * After version 0.11: @lookupname may be NULL to test lookup of @src
520
 * without allocating memory.
521
 *
522
 * Returns: On successful conversion %IDN2_OK is returned, if the
523
 *   output domain or any label would have been too long
524
 *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
525
 *   another error code is returned.
526
 *
527
 * Since: 0.1
528
 **/
529
int
530
idn2_lookup_u8 (const uint8_t *src, uint8_t **lookupname, int flags)
531
25.5k
{
532
25.5k
  size_t lookupnamelen = 0;
533
25.5k
  uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
534
25.5k
  uint8_t *src_allocated = NULL;
535
25.5k
  int rc;
536
537
25.5k
  if (src == NULL)
538
0
    {
539
0
      if (lookupname)
540
0
  *lookupname = NULL;
541
0
      return IDN2_OK;
542
0
    }
543
544
25.5k
  rc = set_default_flags (&flags);
545
25.5k
  if (rc != IDN2_OK)
546
0
    return rc;
547
548
25.5k
  if (!(flags & IDN2_NO_TR46))
549
25.5k
    {
550
25.5k
      uint8_t *out = NULL;
551
552
25.5k
      rc = _tr46 (src, &out, flags);
553
25.5k
      if (rc != IDN2_OK)
554
9.48k
  return rc;
555
556
16.0k
      src = src_allocated = out;
557
16.0k
    }
558
559
16.0k
  do
560
26.0k
    {
561
26.0k
      const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
562
      /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
563
         and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
564
26.0k
      size_t labellen = end - src;
565
26.0k
      uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
566
26.0k
      size_t tmplen = IDN2_LABEL_MAX_LENGTH;
567
568
26.0k
      rc = label (src, labellen, tmp, &tmplen, flags);
569
26.0k
      if (rc != IDN2_OK)
570
5.56k
  {
571
5.56k
    free (src_allocated);
572
5.56k
    return rc;
573
5.56k
  }
574
575
20.5k
      if (lookupnamelen + tmplen
576
20.5k
    > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
577
79
  {
578
79
    free (src_allocated);
579
79
    return IDN2_TOO_BIG_DOMAIN;
580
79
  }
581
582
20.4k
      memcpy (_lookupname + lookupnamelen, tmp, tmplen);
583
20.4k
      lookupnamelen += tmplen;
584
585
20.4k
      if (*end == '.')
586
10.0k
  {
587
10.0k
    if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
588
0
      {
589
0
        free (src_allocated);
590
0
        return IDN2_TOO_BIG_DOMAIN;
591
0
      }
592
593
10.0k
    _lookupname[lookupnamelen] = '.';
594
10.0k
    lookupnamelen++;
595
10.0k
  }
596
20.4k
      _lookupname[lookupnamelen] = '\0';
597
598
20.4k
      src = end;
599
20.4k
    }
600
20.4k
  while (*src++);
601
602
16.0k
  free (src_allocated);
603
604
10.4k
  if (lookupname)
605
10.4k
    {
606
10.4k
      uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
607
608
10.4k
      if (tmp == NULL)
609
0
  return IDN2_MALLOC;
610
611
10.4k
      memcpy (tmp, _lookupname, lookupnamelen + 1);
612
10.4k
      *lookupname = tmp;
613
10.4k
    }
614
615
10.4k
  return IDN2_OK;
616
10.4k
}
617
618
/**
619
 * idn2_lookup_ul:
620
 * @src: input zero-terminated locale encoded string.
621
 * @lookupname: newly allocated output variable with name to lookup in DNS.
622
 * @flags: optional #idn2_flags to modify behaviour.
623
 *
624
 * Perform IDNA2008 lookup string conversion on domain name @src, as
625
 * described in section 5 of RFC 5891.  Note that the input is assumed
626
 * to be encoded in the locale's default coding system, and will be
627
 * transcoded to UTF-8 and NFC normalized by this function.
628
 *
629
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
630
 * convert any input A-labels to U-labels and perform additional
631
 * testing. This is default since version 2.2.
632
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
633
 *
634
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
635
 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
636
 * processing.
637
 *
638
 * Multiple flags may be specified by binary or:ing them together, for
639
 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
640
 *
641
 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
642
 *
643
 * After version 0.11: @lookupname may be NULL to test lookup of @src
644
 * without allocating memory.
645
 *
646
 * Returns: On successful conversion %IDN2_OK is returned, if
647
 *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
648
 *   returned, if the output domain or any label would have been too
649
 *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
650
 *   another error code is returned.
651
 *
652
 * Since: 0.1
653
 **/
654
int
655
idn2_lookup_ul (const char *src, char **lookupname, int flags)
656
16.5k
{
657
16.5k
  uint8_t *utf8src = NULL;
658
16.5k
  int rc;
659
660
16.5k
  if (src)
661
16.5k
    {
662
16.5k
      const char *encoding = locale_charset ();
663
664
16.5k
      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
665
666
16.5k
      if (!utf8src)
667
11.4k
  {
668
11.4k
    if (errno == ENOMEM)
669
0
      return IDN2_MALLOC;
670
11.4k
    return IDN2_ICONV_FAIL;
671
11.4k
  }
672
16.5k
    }
673
674
5.10k
  rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
675
5.10k
           flags | IDN2_NFC_INPUT);
676
677
5.10k
  free (utf8src);
678
679
5.10k
  return rc;
680
16.5k
}
681
682
/**
683
 * idn2_to_ascii_4i:
684
 * @input: zero terminated input Unicode (UCS-4) string.
685
 * @inlen: number of elements in @input.
686
 * @output: output zero terminated string that must have room for at
687
 *       least 63 characters plus the terminating zero.
688
 * @flags: optional #idn2_flags to modify behaviour.
689
 *
690
 * The ToASCII operation takes a sequence of Unicode code points that make
691
 * up one domain label and transforms it into a sequence of code points in
692
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
693
 * the resulting sequence are equivalent labels.
694
 *
695
 * It is important to note that the ToASCII operation can fail.
696
 * ToASCII fails if any step of it fails. If any step of the
697
 * ToASCII operation fails on any label in a domain name, that domain
698
 * name MUST NOT be used as an internationalized domain name.
699
 * The method for dealing with this failure is application-specific.
700
 *
701
 * The inputs to ToASCII are a sequence of code points.
702
 *
703
 * ToASCII never alters a sequence of code points that are all in the ASCII
704
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
705
 * effect as applying it just once.
706
 *
707
 * The default behavior of this function (when flags are zero) is to apply
708
 * the IDNA2008 rules without the TR46 amendments. As the TR46
709
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
710
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
711
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
712
 *
713
 * Warning: With version 2.1.1 until before version 2.3.5 this
714
 * function was deprecated in favor idn2_to_ascii_4i2().  We still
715
 * encourage you to use idn2_to_ascii_4i2() when appropriate.
716
 *
717
 * Returns: On successful conversion %IDN2_OK is returned; if the
718
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
719
 *   returned, or another error code is returned.
720
 *
721
 * Since: 2.0.0
722
 **/
723
int
724
idn2_to_ascii_4i (const uint32_t *input, size_t inlen, char *output,
725
      int flags)
726
978
{
727
978
  char *out;
728
978
  int rc;
729
730
978
  if (!input)
731
0
    {
732
0
      if (output)
733
0
  *output = 0;
734
0
      return IDN2_OK;
735
0
    }
736
737
978
  rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
738
978
  if (rc == IDN2_OK)
739
135
    {
740
135
      size_t len = strlen (out);
741
742
135
      if (len > IDN2_LABEL_MAX_LENGTH)
743
10
  rc = IDN2_TOO_BIG_LABEL;
744
125
      else if (output)
745
125
  strcpy (output, out);
746
747
135
      free (out);
748
135
    }
749
750
978
  return rc;
751
978
}
752
753
/**
754
 * idn2_to_ascii_4i2:
755
 * @input: zero terminated input Unicode (UCS-4) string.
756
 * @inlen: number of elements in @input.
757
 * @output: pointer to newly allocated zero-terminated output string.
758
 * @flags: optional #idn2_flags to modify behaviour.
759
 *
760
 * The ToASCII operation takes a sequence of Unicode code points that make
761
 * up one domain label and transforms it into a sequence of code points in
762
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
763
 * the resulting sequence are equivalent labels.
764
 *
765
 * It is important to note that the ToASCII operation can fail.
766
 * ToASCII fails if any step of it fails. If any step of the
767
 * ToASCII operation fails on any label in a domain name, that domain
768
 * name MUST NOT be used as an internationalized domain name.
769
 * The method for dealing with this failure is application-specific.
770
 *
771
 * The inputs to ToASCII are a sequence of code points.
772
 *
773
 * ToASCII never alters a sequence of code points that are all in the ASCII
774
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
775
 * effect as applying it just once.
776
 *
777
 * The default behavior of this function (when flags are zero) is to apply
778
 * the IDNA2008 rules without the TR46 amendments. As the TR46
779
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
780
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
781
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
782
 *
783
 * Returns: On successful conversion %IDN2_OK is returned; if the
784
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
785
 *   returned, or another error code is returned.
786
 *
787
 * Since: 2.1.1
788
 **/
789
int
790
idn2_to_ascii_4i2 (const uint32_t *input, size_t inlen, char **output,
791
       int flags)
792
6.84k
{
793
6.84k
  uint32_t *input_u32;
794
6.84k
  uint8_t *input_u8, *output_u8;
795
6.84k
  size_t length;
796
6.84k
  int rc;
797
798
6.84k
  if (!input)
799
0
    {
800
0
      if (output)
801
0
  *output = NULL;
802
0
      return IDN2_OK;
803
0
    }
804
805
6.84k
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
806
6.84k
  if (!input_u32)
807
0
    return IDN2_MALLOC;
808
809
6.84k
  u32_cpy (input_u32, input, inlen);
810
6.84k
  input_u32[inlen] = 0;
811
812
6.84k
  input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
813
6.84k
  free (input_u32);
814
6.84k
  if (!input_u8)
815
4.76k
    {
816
4.76k
      if (errno == ENOMEM)
817
0
  return IDN2_MALLOC;
818
4.76k
      return IDN2_ENCODING_ERROR;
819
4.76k
    }
820
821
2.07k
  rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
822
2.07k
  free (input_u8);
823
824
2.07k
  if (rc == IDN2_OK)
825
1.13k
    {
826
1.13k
      if (output)
827
1.13k
  *output = (char *) output_u8;
828
0
      else
829
1.13k
  free (output_u8);
830
1.13k
    }
831
832
2.07k
  return rc;
833
6.84k
}
834
835
/**
836
 * idn2_to_ascii_4z:
837
 * @input: zero terminated input Unicode (UCS-4) string.
838
 * @output: pointer to newly allocated zero-terminated output string.
839
 * @flags: optional #idn2_flags to modify behaviour.
840
 *
841
 * Convert UCS-4 domain name to ASCII string using the IDNA2008
842
 * rules.  The domain name may contain several labels, separated by dots.
843
 * The output buffer must be deallocated by the caller.
844
 *
845
 * The default behavior of this function (when flags are zero) is to apply
846
 * the IDNA2008 rules without the TR46 amendments. As the TR46
847
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
848
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
849
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
850
 *
851
 * Return value: Returns %IDN2_OK on success, or error code.
852
 *
853
 * Since: 2.0.0
854
 **/
855
int
856
idn2_to_ascii_4z (const uint32_t *input, char **output, int flags)
857
5.86k
{
858
5.86k
  uint8_t *input_u8;
859
5.86k
  size_t length;
860
5.86k
  int rc;
861
862
5.86k
  if (!input)
863
0
    {
864
0
      if (output)
865
0
  *output = NULL;
866
0
      return IDN2_OK;
867
0
    }
868
869
5.86k
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
870
5.86k
  if (!input_u8)
871
4.06k
    {
872
4.06k
      if (errno == ENOMEM)
873
0
  return IDN2_MALLOC;
874
4.06k
      return IDN2_ENCODING_ERROR;
875
4.06k
    }
876
877
1.80k
  rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
878
1.80k
  free (input_u8);
879
880
1.80k
  return rc;
881
5.86k
}
882
883
/**
884
 * idn2_to_ascii_8z:
885
 * @input: zero terminated input UTF-8 string.
886
 * @output: pointer to newly allocated output string.
887
 * @flags: optional #idn2_flags to modify behaviour.
888
 *
889
 * Convert UTF-8 domain name to ASCII string using the IDNA2008
890
 * rules.  The domain name may contain several labels, separated by dots.
891
 * The output buffer must be deallocated by the caller.
892
 *
893
 * The default behavior of this function (when flags are zero) is to apply
894
 * the IDNA2008 rules without the TR46 amendments. As the TR46
895
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
896
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
897
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
898
 *
899
 * Return value: Returns %IDN2_OK on success, or error code.
900
 *
901
 * Since: 2.0.0
902
 **/
903
int
904
idn2_to_ascii_8z (const char *input, char **output, int flags)
905
16.5k
{
906
16.5k
  return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
907
16.5k
}
908
909
/**
910
 * idn2_to_ascii_lz:
911
 * @input: zero terminated input UTF-8 string.
912
 * @output: pointer to newly allocated output string.
913
 * @flags: optional #idn2_flags to modify behaviour.
914
 *
915
 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
916
 * rules.  The domain name may contain several labels, separated by dots.
917
 * The output buffer must be deallocated by the caller.
918
 *
919
 * The default behavior of this function (when flags are zero) is to apply
920
 * the IDNA2008 rules without the TR46 amendments. As the TR46
921
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
922
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
923
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
924
 *
925
 * Returns: %IDN2_OK on success, or error code.
926
 * Same as described in idn2_lookup_ul() documentation.
927
 *
928
 * Since: 2.0.0
929
 **/
930
int
931
idn2_to_ascii_lz (const char *input, char **output, int flags)
932
16.5k
{
933
16.5k
  return idn2_lookup_ul (input, output, flags);
934
16.5k
}