Coverage Report

Created: 2023-06-07 07:18

/src/libidn2/lib/lookup.c
Line
Count
Source (jump to first uncovered line)
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2022 Simon Josefsson
3
   Copyright (C) 2017-2022 Tim Ruehsen
4
5
   Libidn2 is free software: you can redistribute it and/or modify it
6
   under the terms of either:
7
8
     * the GNU Lesser General Public License as published by the Free
9
       Software Foundation; either version 3 of the License, or (at
10
       your option) any later version.
11
12
   or
13
14
     * the GNU General Public License as published by the Free
15
       Software Foundation; either version 2 of the License, or (at
16
       your option) any later version.
17
18
   or both in parallel, as here.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License for more details.
24
25
   You should have received copies of the GNU General Public License and
26
   the GNU Lesser General Public License along with this program.  If
27
   not, see <http://www.gnu.org/licenses/>.
28
*/
29
30
#include <config.h>
31
32
#include "idn2.h"
33
34
#include <errno.h>    /* errno */
35
#include <stdlib.h>   /* malloc, free */
36
37
#include "punycode.h"
38
39
#include <unitypes.h>
40
#include <uniconv.h>    /* u8_strconv_from_locale */
41
#include <uninorm.h>    /* u32_normalize */
42
#include <unistr.h>   /* u8_to_u32 */
43
44
#include "idna.h"   /* _idn2_label_test */
45
#include "tr46map.h"    /* definition for tr46map.c */
46
47
#ifdef HAVE_LIBUNISTRING
48
/* copied from gnulib */
49
# include <limits.h>
50
# define _C_CTYPE_LOWER_N(N) \
51
0
   case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
52
0
   case 'e' + (N): case 'f' + (N): \
53
0
   case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
54
0
   case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
55
0
   case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
56
0
   case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
57
0
   case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
58
0
# define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
59
static inline int
60
c_tolower (int c)
61
21.1k
{
62
21.1k
  switch (c)
63
21.1k
    {
64
0
    _C_CTYPE_UPPER:
65
0
      return c - 'A' + 'a';
66
21.1k
    default:
67
21.1k
      return c;
68
21.1k
    }
69
21.1k
}
70
71
static int
72
c_strncasecmp (const char *s1, const char *s2, size_t n)
73
929
{
74
929
  register const unsigned char *p1 = (const unsigned char *) s1;
75
929
  register const unsigned char *p2 = (const unsigned char *) s2;
76
929
  unsigned char c1, c2;
77
78
929
  if (p1 == p2 || n == 0)
79
0
    return 0;
80
81
929
  do
82
10.5k
    {
83
10.5k
      c1 = c_tolower (*p1);
84
10.5k
      c2 = c_tolower (*p2);
85
86
10.5k
      if (--n == 0 || c1 == '\0')
87
641
  break;
88
89
9.90k
      ++p1;
90
9.90k
      ++p2;
91
9.90k
    }
92
9.90k
  while (c1 == c2);
93
94
929
  if (UCHAR_MAX <= INT_MAX)
95
929
    return c1 - c2;
96
0
  else
97
    /* On machines where 'char' and 'int' are types of the same size, the
98
       difference of two 'unsigned char' values - including the sign bit -
99
       doesn't fit in an 'int'.  */
100
0
    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
101
929
}
102
#else
103
# include <c-strcase.h>
104
#endif
105
106
static int
107
set_default_flags (int *flags)
108
7.47M
{
109
7.47M
  if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
110
0
    return IDN2_INVALID_FLAGS;
111
112
7.47M
  if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
113
7.47M
      && ((*flags) & IDN2_NO_TR46))
114
0
    return IDN2_INVALID_FLAGS;
115
116
7.47M
  if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
117
7.47M
      && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
118
0
    return IDN2_INVALID_FLAGS;
119
120
7.47M
  if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
121
7.47M
    *flags |= IDN2_NONTRANSITIONAL;
122
123
7.47M
  return IDN2_OK;
124
7.47M
}
125
126
static int
127
label (const uint8_t * src, size_t srclen, uint8_t * dst, size_t *dstlen,
128
       int flags)
129
7.39M
{
130
7.39M
  size_t plen;
131
7.39M
  uint32_t *p = NULL;
132
7.39M
  const uint8_t *src_org = NULL;
133
7.39M
  uint8_t *src_allocated = NULL;
134
7.39M
  int rc, check_roundtrip = 0;
135
7.39M
  size_t tmpl, srclen_org = 0;
136
7.39M
  uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
137
7.39M
  size_t label32_len = IDN2_LABEL_MAX_LENGTH;
138
139
7.39M
  if (_idn2_ascii_p (src, srclen))
140
3.51M
    {
141
3.51M
      if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
142
3.51M
    && memcmp (src, "xn--", 4) == 0)
143
1.03k
  {
144
    /*
145
       If the input to this procedure appears to be an A-label
146
       (i.e., it starts in "xn--", interpreted
147
       case-insensitively), the lookup application MAY attempt to
148
       convert it to a U-label, first ensuring that the A-label is
149
       entirely in lowercase (converting it to lowercase if
150
       necessary), and apply the tests of Section 5.4 and the
151
       conversion of Section 5.5 to that form. */
152
1.03k
    rc =
153
1.03k
      _idn2_punycode_decode_internal (srclen - 4, (char *) src + 4,
154
1.03k
              &label32_len, label_u32);
155
1.03k
    if (rc)
156
0
      return rc;
157
158
1.03k
    check_roundtrip = 1;
159
1.03k
    src_org = src;
160
1.03k
    srclen_org = srclen;
161
162
1.03k
    srclen = IDN2_LABEL_MAX_LENGTH;
163
1.03k
    src = src_allocated =
164
1.03k
      u32_to_u8 (label_u32, label32_len, NULL, &srclen);
165
1.03k
    if (!src)
166
0
      {
167
0
        if (errno == ENOMEM)
168
0
    return IDN2_MALLOC;
169
0
        return IDN2_ENCODING_ERROR;
170
0
      }
171
1.03k
  }
172
3.51M
      else
173
3.51M
  {
174
3.51M
    if (srclen > IDN2_LABEL_MAX_LENGTH)
175
321
      return IDN2_TOO_BIG_LABEL;
176
3.51M
    if (srclen > *dstlen)
177
0
      return IDN2_TOO_BIG_DOMAIN;
178
179
3.51M
    memcpy (dst, src, srclen);
180
3.51M
    *dstlen = srclen;
181
3.51M
    return IDN2_OK;
182
3.51M
  }
183
3.51M
    }
184
185
3.88M
  rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
186
3.88M
  if (rc != IDN2_OK)
187
0
    goto out;
188
189
3.88M
  if (!(flags & IDN2_TRANSITIONAL))
190
3.88M
    {
191
3.88M
      rc = _idn2_label_test (TEST_NFC |
192
3.88M
           TEST_2HYPHEN |
193
3.88M
           TEST_LEADING_COMBINING |
194
3.88M
           TEST_DISALLOWED |
195
3.88M
           TEST_CONTEXTJ_RULE |
196
3.88M
           TEST_CONTEXTO_WITH_RULE |
197
3.88M
           TEST_UNASSIGNED | TEST_BIDI |
198
3.88M
           ((flags & IDN2_NONTRANSITIONAL) ?
199
3.88M
            TEST_NONTRANSITIONAL : 0) | ((flags &
200
3.88M
                  IDN2_USE_STD3_ASCII_RULES)
201
3.88M
                 ? 0 :
202
3.88M
                 TEST_ALLOW_STD3_DISALLOWED),
203
3.88M
           p, plen);
204
205
3.88M
      if (rc != IDN2_OK)
206
16.1k
  goto out;
207
3.88M
    }
208
209
3.87M
  dst[0] = 'x';
210
3.87M
  dst[1] = 'n';
211
3.87M
  dst[2] = '-';
212
3.87M
  dst[3] = '-';
213
214
3.87M
  tmpl = *dstlen - 4;
215
3.87M
  rc = _idn2_punycode_encode_internal (plen, p, &tmpl, (char *) dst + 4);
216
3.87M
  if (rc != IDN2_OK)
217
958
    goto out;
218
219
220
3.86M
  *dstlen = 4 + tmpl;
221
222
3.86M
  if (check_roundtrip)
223
929
    {
224
929
      if (srclen_org != *dstlen
225
929
    || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
226
538
  {
227
538
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
228
538
    goto out;
229
538
  }
230
929
    }
231
3.86M
  else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
232
3.86M
    {
233
3.86M
      rc =
234
3.86M
  _idn2_punycode_decode_internal (*dstlen - 4, (char *) dst + 4,
235
3.86M
          &label32_len, label_u32);
236
3.86M
      if (rc)
237
6.50k
  {
238
6.50k
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
239
6.50k
    goto out;
240
6.50k
  }
241
242
3.86M
      if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
243
0
  {
244
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
245
0
    goto out;
246
0
  }
247
3.86M
    }
248
249
3.86M
  rc = IDN2_OK;
250
251
3.88M
out:
252
3.88M
  free (p);
253
3.88M
  free (src_allocated);
254
3.88M
  return rc;
255
3.86M
}
256
257
#define TR46_TRANSITIONAL_CHECK \
258
0
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
259
#define TR46_NONTRANSITIONAL_CHECK \
260
8.43M
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
261
262
static int
263
_tr46 (const uint8_t * domain_u8, uint8_t ** out, int flags)
264
7.47M
{
265
7.47M
  size_t len, it;
266
7.47M
  uint32_t *domain_u32;
267
7.47M
  int err = IDN2_OK, rc;
268
7.47M
  int transitional = 0;
269
7.47M
  int test_flags;
270
271
7.47M
  if (flags & IDN2_TRANSITIONAL)
272
0
    transitional = 1;
273
274
  /* convert UTF-8 to UTF-32 */
275
7.47M
  if (!(domain_u32 =
276
7.47M
  u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
277
0
    {
278
0
      if (errno == ENOMEM)
279
0
  return IDN2_MALLOC;
280
0
      return IDN2_ENCODING_ERROR;
281
0
    }
282
283
7.47M
  size_t len2 = 0;
284
15.5M
  for (it = 0; it < len - 1; it++)
285
8.51M
    {
286
8.51M
      IDNAMap map;
287
288
8.51M
      get_idna_map (domain_u32[it], &map);
289
290
8.51M
      if (map_is (&map, TR46_FLG_DISALLOWED))
291
420k
  {
292
420k
    if (domain_u32[it])
293
420k
      {
294
420k
        free (domain_u32);
295
420k
        return IDN2_DISALLOWED;
296
420k
      }
297
0
    len2++;
298
0
  }
299
8.09M
      else if (map_is (&map, TR46_FLG_MAPPED))
300
3.04k
  {
301
3.04k
    len2 += map.nmappings;
302
3.04k
  }
303
8.09M
      else if (map_is (&map, TR46_FLG_VALID))
304
5.03M
  {
305
5.03M
    len2++;
306
5.03M
  }
307
3.05M
      else if (map_is (&map, TR46_FLG_IGNORED))
308
2.99M
  {
309
2.99M
    continue;
310
2.99M
  }
311
61.7k
      else if (map_is (&map, TR46_FLG_DEVIATION))
312
3.56k
  {
313
3.56k
    if (transitional)
314
0
      {
315
0
        len2 += map.nmappings;
316
0
      }
317
3.56k
    else
318
3.56k
      len2++;
319
3.56k
  }
320
58.2k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
321
58.2k
  {
322
58.2k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
323
58.2k
      {
324
        /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
325
58.2k
        len2++;
326
58.2k
      }
327
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
328
0
      {
329
        /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
330
0
        len2 += map.nmappings;
331
0
      }
332
58.2k
  }
333
8.51M
    }
334
335
  /* Exit early if result is too long.
336
   * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
337
7.05M
  if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
338
202
    {
339
202
      free (domain_u32);
340
202
      return IDN2_TOO_BIG_DOMAIN;
341
202
    }
342
343
7.05M
  uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
344
7.05M
  if (!tmp)
345
0
    {
346
0
      free (domain_u32);
347
0
      return IDN2_MALLOC;
348
0
    }
349
350
7.05M
  len2 = 0;
351
15.0M
  for (it = 0; it < len - 1; it++)
352
7.99M
    {
353
7.99M
      uint32_t c = domain_u32[it];
354
7.99M
      IDNAMap map;
355
356
7.99M
      get_idna_map (c, &map);
357
358
7.99M
      if (map_is (&map, TR46_FLG_DISALLOWED))
359
0
  {
360
0
    tmp[len2++] = c;
361
0
  }
362
7.99M
      else if (map_is (&map, TR46_FLG_MAPPED))
363
3.03k
  {
364
3.03k
    len2 += get_map_data (tmp + len2, &map);
365
3.03k
  }
366
7.99M
      else if (map_is (&map, TR46_FLG_VALID))
367
4.95M
  {
368
4.95M
    tmp[len2++] = c;
369
4.95M
  }
370
3.03M
      else if (map_is (&map, TR46_FLG_IGNORED))
371
2.99M
  {
372
2.99M
    continue;
373
2.99M
  }
374
48.4k
      else if (map_is (&map, TR46_FLG_DEVIATION))
375
3.42k
  {
376
3.42k
    if (transitional)
377
0
      {
378
0
        len2 += get_map_data (tmp + len2, &map);
379
0
      }
380
3.42k
    else
381
3.42k
      tmp[len2++] = c;
382
3.42k
  }
383
45.0k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
384
45.0k
  {
385
45.0k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
386
45.0k
      {
387
45.0k
        tmp[len2++] = c;
388
45.0k
      }
389
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
390
0
      {
391
0
        len2 += get_map_data (tmp + len2, &map);
392
0
      }
393
45.0k
  }
394
7.99M
    }
395
7.05M
  free (domain_u32);
396
397
  /* Normalize to NFC */
398
7.05M
  tmp[len2] = 0;
399
7.05M
  domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
400
7.05M
  free (tmp);
401
7.05M
  tmp = NULL;
402
403
7.05M
  if (!domain_u32)
404
0
    {
405
0
      if (errno == ENOMEM)
406
0
  return IDN2_MALLOC;
407
0
      return IDN2_ENCODING_ERROR;
408
0
    }
409
410
  /* split into labels and check */
411
7.05M
  uint32_t *e, *s;
412
11.2M
  for (e = s = domain_u32; *e; s = e)
413
4.22M
    {
414
8.86M
      while (*e && *e != '.')
415
4.64M
  e++;
416
417
4.22M
      if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
418
4.22M
    && s[3] == '-')
419
4.41k
  {
420
    /* decode punycode and check result non-transitional */
421
4.41k
    size_t ace_len;
422
4.41k
    uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
423
4.41k
    size_t name_len = IDN2_LABEL_MAX_LENGTH;
424
4.41k
    uint8_t *ace;
425
426
4.41k
    ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
427
4.41k
    if (!ace)
428
0
      {
429
0
        free (domain_u32);
430
0
        if (errno == ENOMEM)
431
0
    return IDN2_MALLOC;
432
0
        return IDN2_ENCODING_ERROR;
433
0
      }
434
435
4.41k
    rc = _idn2_punycode_decode_internal (ace_len, (char *) ace,
436
4.41k
                 &name_len, name_u32);
437
438
4.41k
    free (ace);
439
440
4.41k
    if (rc)
441
2.33k
      {
442
2.33k
        free (domain_u32);
443
2.33k
        return rc;
444
2.33k
      }
445
446
2.07k
    test_flags = TR46_NONTRANSITIONAL_CHECK;
447
448
2.07k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
449
2.07k
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
450
451
2.07k
    if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
452
1.04k
      err = rc;
453
2.07k
  }
454
4.21M
      else
455
4.21M
  {
456
4.21M
    test_flags =
457
4.21M
      transitional ? TR46_TRANSITIONAL_CHECK :
458
4.21M
      TR46_NONTRANSITIONAL_CHECK;
459
460
4.21M
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
461
4.21M
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
462
463
4.21M
    if ((rc = _idn2_label_test (test_flags, s, e - s)))
464
3.99k
      err = rc;
465
4.21M
  }
466
467
4.21M
      if (*e)
468
360k
  e++;
469
4.21M
    }
470
471
7.04M
  if (err == IDN2_OK && out)
472
7.04M
    {
473
7.04M
      uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
474
7.04M
      free (domain_u32);
475
476
7.04M
      if (!_out)
477
0
  {
478
0
    if (errno == ENOMEM)
479
0
      return IDN2_MALLOC;
480
0
    return IDN2_ENCODING_ERROR;
481
0
  }
482
483
7.04M
      *out = _out;
484
7.04M
    }
485
3.93k
  else
486
3.93k
    free (domain_u32);
487
488
7.04M
  return err;
489
7.04M
}
490
491
/**
492
 * idn2_lookup_u8:
493
 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
494
 * @lookupname: newly allocated output variable with name to lookup in DNS.
495
 * @flags: optional #idn2_flags to modify behaviour.
496
 *
497
 * Perform IDNA2008 lookup string conversion on domain name @src, as
498
 * described in section 5 of RFC 5891.  Note that the input string
499
 * must be encoded in UTF-8 and be in Unicode NFC form.
500
 *
501
 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
502
 * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
503
 * do already imply %IDN2_NFC_INPUT.
504
 *
505
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
506
 * convert any input A-labels to U-labels and perform additional
507
 * testing. This is default since version 2.2.
508
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
509
 *
510
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
511
 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
512
 * Unicode TR46 non-transitional processing.
513
 *
514
 * Multiple flags may be specified by binary or:ing them together.
515
 *
516
 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
517
 * Previously we were eliminating non-STD3 characters from domain strings
518
 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
519
 * functions. That was an unexpected regression for applications switching
520
 * from libidn and thus it is no longer applied by default.
521
 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
522
 *
523
 * After version 0.11: @lookupname may be NULL to test lookup of @src
524
 * without allocating memory.
525
 *
526
 * Returns: On successful conversion %IDN2_OK is returned, if the
527
 *   output domain or any label would have been too long
528
 *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
529
 *   another error code is returned.
530
 *
531
 * Since: 0.1
532
 **/
533
int
534
idn2_lookup_u8 (const uint8_t * src, uint8_t ** lookupname, int flags)
535
7.47M
{
536
7.47M
  size_t lookupnamelen = 0;
537
7.47M
  uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
538
7.47M
  uint8_t *src_allocated = NULL;
539
7.47M
  int rc;
540
541
7.47M
  if (src == NULL)
542
0
    {
543
0
      if (lookupname)
544
0
  *lookupname = NULL;
545
0
      return IDN2_OK;
546
0
    }
547
548
7.47M
  rc = set_default_flags (&flags);
549
7.47M
  if (rc != IDN2_OK)
550
0
    return rc;
551
552
7.47M
  if (!(flags & IDN2_NO_TR46))
553
7.47M
    {
554
7.47M
      uint8_t *out;
555
556
7.47M
      rc = _tr46 (src, &out, flags);
557
7.47M
      if (rc != IDN2_OK)
558
427k
  return rc;
559
560
7.04M
      src = src_allocated = out;
561
7.04M
    }
562
563
7.04M
  do
564
7.39M
    {
565
7.39M
      const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
566
      /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
567
         and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
568
7.39M
      size_t labellen = end - src;
569
7.39M
      uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
570
7.39M
      size_t tmplen = IDN2_LABEL_MAX_LENGTH;
571
572
7.39M
      rc = label (src, labellen, tmp, &tmplen, flags);
573
7.39M
      if (rc != IDN2_OK)
574
24.4k
  {
575
24.4k
    free (src_allocated);
576
24.4k
    return rc;
577
24.4k
  }
578
579
7.37M
      if (lookupnamelen + tmplen
580
7.37M
    > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
581
202
  {
582
202
    free (src_allocated);
583
202
    return IDN2_TOO_BIG_DOMAIN;
584
202
  }
585
586
7.37M
      memcpy (_lookupname + lookupnamelen, tmp, tmplen);
587
7.37M
      lookupnamelen += tmplen;
588
589
7.37M
      if (*end == '.')
590
356k
  {
591
356k
    if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
592
0
      {
593
0
        free (src_allocated);
594
0
        return IDN2_TOO_BIG_DOMAIN;
595
0
      }
596
597
356k
    _lookupname[lookupnamelen] = '.';
598
356k
    lookupnamelen++;
599
356k
  }
600
7.37M
      _lookupname[lookupnamelen] = '\0';
601
602
7.37M
      src = end;
603
7.37M
    }
604
7.37M
  while (*src++);
605
606
7.01M
  free (src_allocated);
607
608
7.01M
  if (lookupname)
609
7.01M
    {
610
7.01M
      uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
611
612
7.01M
      if (tmp == NULL)
613
0
  return IDN2_MALLOC;
614
615
7.01M
      memcpy (tmp, _lookupname, lookupnamelen + 1);
616
7.01M
      *lookupname = tmp;
617
7.01M
    }
618
619
7.01M
  return IDN2_OK;
620
7.01M
}
621
622
/**
623
 * idn2_lookup_ul:
624
 * @src: input zero-terminated locale encoded string.
625
 * @lookupname: newly allocated output variable with name to lookup in DNS.
626
 * @flags: optional #idn2_flags to modify behaviour.
627
 *
628
 * Perform IDNA2008 lookup string conversion on domain name @src, as
629
 * described in section 5 of RFC 5891.  Note that the input is assumed
630
 * to be encoded in the locale's default coding system, and will be
631
 * transcoded to UTF-8 and NFC normalized by this function.
632
 *
633
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
634
 * convert any input A-labels to U-labels and perform additional
635
 * testing. This is default since version 2.2.
636
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
637
 *
638
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
639
 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
640
 * processing.
641
 *
642
 * Multiple flags may be specified by binary or:ing them together, for
643
 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
644
 *
645
 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
646
 *
647
 * After version 0.11: @lookupname may be NULL to test lookup of @src
648
 * without allocating memory.
649
 *
650
 * Returns: On successful conversion %IDN2_OK is returned, if
651
 *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
652
 *   returned, if the output domain or any label would have been too
653
 *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
654
 *   another error code is returned.
655
 *
656
 * Since: 0.1
657
 **/
658
int
659
idn2_lookup_ul (const char *src, char **lookupname, int flags)
660
0
{
661
0
  uint8_t *utf8src = NULL;
662
0
  int rc;
663
664
0
  if (src)
665
0
    {
666
0
      const char *encoding = locale_charset ();
667
668
0
      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
669
670
0
      if (!utf8src)
671
0
  {
672
0
    if (errno == ENOMEM)
673
0
      return IDN2_MALLOC;
674
0
    return IDN2_ICONV_FAIL;
675
0
  }
676
0
    }
677
678
0
  rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
679
0
           flags | IDN2_NFC_INPUT);
680
681
0
  free (utf8src);
682
683
0
  return rc;
684
0
}
685
686
/**
687
 * idn2_to_ascii_4i:
688
 * @input: zero terminated input Unicode (UCS-4) string.
689
 * @inlen: number of elements in @input.
690
 * @output: output zero terminated string that must have room for at least 63 characters plus the terminating zero.
691
 * @flags: optional #idn2_flags to modify behaviour.
692
 *
693
 * THIS FUNCTION HAS BEEN DEPRECATED DUE TO A DESIGN FLAW. USE idn2_to_ascii_4i2() INSTEAD !
694
 *
695
 * The ToASCII operation takes a sequence of Unicode code points that make
696
 * up one domain label and transforms it into a sequence of code points in
697
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
698
 * the resulting sequence are equivalent labels.
699
 *
700
 * It is important to note that the ToASCII operation can fail.
701
 * ToASCII fails if any step of it fails. If any step of the
702
 * ToASCII operation fails on any label in a domain name, that domain
703
 * name MUST NOT be used as an internationalized domain name.
704
 * The method for dealing with this failure is application-specific.
705
 *
706
 * The inputs to ToASCII are a sequence of code points.
707
 *
708
 * ToASCII never alters a sequence of code points that are all in the ASCII
709
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
710
 * effect as applying it just once.
711
 *
712
 * The default behavior of this function (when flags are zero) is to apply
713
 * the IDNA2008 rules without the TR46 amendments. As the TR46
714
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
715
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
716
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
717
 *
718
 * Return value: Returns %IDN2_OK on success, or error code.
719
 *
720
 * Since: 2.0.0
721
 *
722
 * Deprecated: 2.1.1: Use idn2_to_ascii_4i2().
723
 **/
724
int
725
idn2_to_ascii_4i (const uint32_t * input, size_t inlen, char *output,
726
      int flags)
727
0
{
728
0
  char *out;
729
0
  int rc;
730
731
0
  if (!input)
732
0
    {
733
0
      if (output)
734
0
  *output = 0;
735
0
      return IDN2_OK;
736
0
    }
737
738
0
  rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
739
0
  if (rc == IDN2_OK)
740
0
    {
741
0
      size_t len = strlen (out);
742
743
0
      if (len > 63)
744
0
  rc = IDN2_TOO_BIG_DOMAIN;
745
0
      else if (output)
746
0
  memcpy (output, out, len);
747
748
0
      free (out);
749
0
    }
750
751
0
  return rc;
752
0
}
753
754
/**
755
 * idn2_to_ascii_4i2:
756
 * @input: zero terminated input Unicode (UCS-4) string.
757
 * @inlen: number of elements in @input.
758
 * @output: pointer to newly allocated zero-terminated output string.
759
 * @flags: optional #idn2_flags to modify behaviour.
760
 *
761
 * The ToASCII operation takes a sequence of Unicode code points that make
762
 * up one domain label and transforms it into a sequence of code points in
763
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
764
 * the resulting sequence are equivalent labels.
765
 *
766
 * It is important to note that the ToASCII operation can fail.
767
 * ToASCII fails if any step of it fails. If any step of the
768
 * ToASCII operation fails on any label in a domain name, that domain
769
 * name MUST NOT be used as an internationalized domain name.
770
 * The method for dealing with this failure is application-specific.
771
 *
772
 * The inputs to ToASCII are a sequence of code points.
773
 *
774
 * ToASCII never alters a sequence of code points that are all in the ASCII
775
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
776
 * effect as applying it just once.
777
 *
778
 * The default behavior of this function (when flags are zero) is to apply
779
 * the IDNA2008 rules without the TR46 amendments. As the TR46
780
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
781
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
782
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
783
 *
784
 * Return value: Returns %IDN2_OK on success, or error code.
785
 *
786
 * Since: 2.1.1
787
 **/
788
int
789
idn2_to_ascii_4i2 (const uint32_t * input, size_t inlen, char **output,
790
       int flags)
791
0
{
792
0
  uint32_t *input_u32;
793
0
  uint8_t *input_u8, *output_u8;
794
0
  size_t length;
795
0
  int rc;
796
797
0
  if (!input)
798
0
    {
799
0
      if (output)
800
0
  *output = NULL;
801
0
      return IDN2_OK;
802
0
    }
803
804
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
805
0
  if (!input_u32)
806
0
    return IDN2_MALLOC;
807
808
0
  u32_cpy (input_u32, input, inlen);
809
0
  input_u32[inlen] = 0;
810
811
0
  input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
812
0
  free (input_u32);
813
0
  if (!input_u8)
814
0
    {
815
0
      if (errno == ENOMEM)
816
0
  return IDN2_MALLOC;
817
0
      return IDN2_ENCODING_ERROR;
818
0
    }
819
820
0
  rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
821
0
  free (input_u8);
822
823
0
  if (rc == IDN2_OK)
824
0
    {
825
0
      if (output)
826
0
  *output = (char *) output_u8;
827
0
      else
828
0
  free (output_u8);
829
0
    }
830
831
0
  return rc;
832
0
}
833
834
/**
835
 * idn2_to_ascii_4z:
836
 * @input: zero terminated input Unicode (UCS-4) string.
837
 * @output: pointer to newly allocated zero-terminated output string.
838
 * @flags: optional #idn2_flags to modify behaviour.
839
 *
840
 * Convert UCS-4 domain name to ASCII string using the IDNA2008
841
 * rules.  The domain name may contain several labels, separated by dots.
842
 * The output buffer must be deallocated by the caller.
843
 *
844
 * The default behavior of this function (when flags are zero) is to apply
845
 * the IDNA2008 rules without the TR46 amendments. As the TR46
846
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
847
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
848
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
849
 *
850
 * Return value: Returns %IDN2_OK on success, or error code.
851
 *
852
 * Since: 2.0.0
853
 **/
854
int
855
idn2_to_ascii_4z (const uint32_t * input, char **output, int flags)
856
0
{
857
0
  uint8_t *input_u8;
858
0
  size_t length;
859
0
  int rc;
860
861
0
  if (!input)
862
0
    {
863
0
      if (output)
864
0
  *output = NULL;
865
0
      return IDN2_OK;
866
0
    }
867
868
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
869
0
  if (!input_u8)
870
0
    {
871
0
      if (errno == ENOMEM)
872
0
  return IDN2_MALLOC;
873
0
      return IDN2_ENCODING_ERROR;
874
0
    }
875
876
0
  rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
877
0
  free (input_u8);
878
879
0
  return rc;
880
0
}
881
882
/**
883
 * idn2_to_ascii_8z:
884
 * @input: zero terminated input UTF-8 string.
885
 * @output: pointer to newly allocated output string.
886
 * @flags: optional #idn2_flags to modify behaviour.
887
 *
888
 * Convert UTF-8 domain name to ASCII string using the IDNA2008
889
 * rules.  The domain name may contain several labels, separated by dots.
890
 * The output buffer must be deallocated by the caller.
891
 *
892
 * The default behavior of this function (when flags are zero) is to apply
893
 * the IDNA2008 rules without the TR46 amendments. As the TR46
894
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
895
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
896
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
897
 *
898
 * Return value: Returns %IDN2_OK on success, or error code.
899
 *
900
 * Since: 2.0.0
901
 **/
902
int
903
idn2_to_ascii_8z (const char *input, char **output, int flags)
904
0
{
905
0
  return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
906
0
}
907
908
/**
909
 * idn2_to_ascii_lz:
910
 * @input: zero terminated input UTF-8 string.
911
 * @output: pointer to newly allocated output string.
912
 * @flags: optional #idn2_flags to modify behaviour.
913
 *
914
 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
915
 * rules.  The domain name may contain several labels, separated by dots.
916
 * The output buffer must be deallocated by the caller.
917
 *
918
 * The default behavior of this function (when flags are zero) is to apply
919
 * the IDNA2008 rules without the TR46 amendments. As the TR46
920
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
921
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
922
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
923
 *
924
 * Returns: %IDN2_OK on success, or error code.
925
 * Same as described in idn2_lookup_ul() documentation.
926
 *
927
 * Since: 2.0.0
928
 **/
929
int
930
idn2_to_ascii_lz (const char *input, char **output, int flags)
931
0
{
932
0
  return idn2_lookup_ul (input, output, flags);
933
0
}