Coverage Report

Created: 2024-10-06 07:21

/src/libidn2/lib/lookup.c
Line
Count
Source (jump to first uncovered line)
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2024 Simon Josefsson
3
   Copyright (C) 2017-2024 Tim Ruehsen
4
5
   Libidn2 is free software: you can redistribute it and/or modify it
6
   under the terms of either:
7
8
     * the GNU Lesser General Public License as published by the Free
9
       Software Foundation; either version 3 of the License, or (at
10
       your option) any later version.
11
12
   or
13
14
     * the GNU General Public License as published by the Free
15
       Software Foundation; either version 2 of the License, or (at
16
       your option) any later version.
17
18
   or both in parallel, as here.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License for more details.
24
25
   You should have received copies of the GNU General Public License and
26
   the GNU Lesser General Public License along with this program.  If
27
   not, see <http://www.gnu.org/licenses/>.
28
*/
29
30
#include <config.h>
31
32
#include "idn2.h"
33
34
#include <errno.h>    /* errno */
35
#include <stdlib.h>   /* malloc, free */
36
37
#include <unitypes.h>
38
#include <uniconv.h>    /* u8_strconv_from_locale */
39
#include <uninorm.h>    /* u32_normalize */
40
#include <unistr.h>   /* u8_to_u32 */
41
42
#include "idna.h"   /* _idn2_label_test */
43
#include "tr46map.h"    /* definition for tr46map.c */
44
45
#ifdef HAVE_LIBUNISTRING
46
/* copied from gnulib */
47
# include <limits.h>
48
# define _C_CTYPE_LOWER_N(N) \
49
0
   case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
50
0
   case 'e' + (N): case 'f' + (N): \
51
0
   case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
52
0
   case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
53
0
   case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
54
0
   case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
55
0
   case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
56
0
# define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
57
static inline int
58
c_tolower (int c)
59
21.9k
{
60
21.9k
  switch (c)
61
21.9k
    {
62
0
    _C_CTYPE_UPPER:
63
0
      return c - 'A' + 'a';
64
21.9k
    default:
65
21.9k
      return c;
66
21.9k
    }
67
21.9k
}
68
69
static int
70
c_strncasecmp (const char *s1, const char *s2, size_t n)
71
932
{
72
932
  register const unsigned char *p1 = (const unsigned char *) s1;
73
932
  register const unsigned char *p2 = (const unsigned char *) s2;
74
932
  unsigned char c1, c2;
75
76
932
  if (p1 == p2 || n == 0)
77
0
    return 0;
78
79
932
  do
80
10.9k
    {
81
10.9k
      c1 = c_tolower (*p1);
82
10.9k
      c2 = c_tolower (*p2);
83
84
10.9k
      if (--n == 0 || c1 == '\0')
85
636
  break;
86
87
10.3k
      ++p1;
88
10.3k
      ++p2;
89
10.3k
    }
90
10.3k
  while (c1 == c2);
91
92
932
  if (UCHAR_MAX <= INT_MAX)
93
932
    return c1 - c2;
94
0
  else
95
    /* On machines where 'char' and 'int' are types of the same size, the
96
       difference of two 'unsigned char' values - including the sign bit -
97
       doesn't fit in an 'int'.  */
98
0
    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
99
932
}
100
#else
101
# include <c-strcase.h>
102
#endif
103
104
static int
105
set_default_flags (int *flags)
106
8.15M
{
107
8.15M
  if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
108
0
    return IDN2_INVALID_FLAGS;
109
110
8.15M
  if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
111
8.15M
      && ((*flags) & IDN2_NO_TR46))
112
0
    return IDN2_INVALID_FLAGS;
113
114
8.15M
  if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
115
8.15M
      && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
116
0
    return IDN2_INVALID_FLAGS;
117
118
8.15M
  if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
119
8.15M
    *flags |= IDN2_NONTRANSITIONAL;
120
121
8.15M
  return IDN2_OK;
122
8.15M
}
123
124
static int
125
label (const uint8_t *src, size_t srclen, uint8_t *dst, size_t *dstlen,
126
       int flags)
127
8.32M
{
128
8.32M
  size_t plen;
129
8.32M
  uint32_t *p = NULL;
130
8.32M
  const uint8_t *src_org = NULL;
131
8.32M
  uint8_t *src_allocated = NULL;
132
8.32M
  int rc, check_roundtrip = 0;
133
8.32M
  size_t tmpl, srclen_org = 0;
134
8.32M
  uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
135
8.32M
  size_t label32_len = IDN2_LABEL_MAX_LENGTH;
136
137
8.32M
  if (_idn2_ascii_p (src, srclen))
138
4.07M
    {
139
4.07M
      if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
140
4.07M
    && memcmp (src, "xn--", 4) == 0)
141
1.04k
  {
142
    /*
143
       If the input to this procedure appears to be an A-label
144
       (i.e., it starts in "xn--", interpreted
145
       case-insensitively), the lookup application MAY attempt to
146
       convert it to a U-label, first ensuring that the A-label is
147
       entirely in lowercase (converting it to lowercase if
148
       necessary), and apply the tests of Section 5.4 and the
149
       conversion of Section 5.5 to that form. */
150
1.04k
    rc = idn2_punycode_decode ((char *) src + 4, srclen - 4,
151
1.04k
             label_u32, &label32_len);
152
1.04k
    if (rc)
153
0
      return rc;
154
155
1.04k
    check_roundtrip = 1;
156
1.04k
    src_org = src;
157
1.04k
    srclen_org = srclen;
158
159
1.04k
    srclen = IDN2_LABEL_MAX_LENGTH;
160
1.04k
    src = src_allocated =
161
1.04k
      u32_to_u8 (label_u32, label32_len, NULL, &srclen);
162
1.04k
    if (!src)
163
0
      {
164
0
        if (errno == ENOMEM)
165
0
    return IDN2_MALLOC;
166
0
        return IDN2_ENCODING_ERROR;
167
0
      }
168
1.04k
  }
169
4.07M
      else
170
4.07M
  {
171
4.07M
    if (srclen > IDN2_LABEL_MAX_LENGTH)
172
236
      return IDN2_TOO_BIG_LABEL;
173
4.07M
    if (srclen > *dstlen)
174
0
      return IDN2_TOO_BIG_DOMAIN;
175
176
4.07M
    memcpy (dst, src, srclen);
177
4.07M
    *dstlen = srclen;
178
4.07M
    return IDN2_OK;
179
4.07M
  }
180
4.07M
    }
181
182
4.25M
  rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
183
4.25M
  if (rc != IDN2_OK)
184
0
    goto out;
185
186
4.25M
  if (!(flags & IDN2_TRANSITIONAL))
187
4.25M
    {
188
4.25M
      rc = _idn2_label_test (TEST_NFC |
189
4.25M
           TEST_2HYPHEN |
190
4.25M
           TEST_LEADING_COMBINING |
191
4.25M
           TEST_DISALLOWED |
192
4.25M
           TEST_CONTEXTJ_RULE |
193
4.25M
           TEST_CONTEXTO_WITH_RULE |
194
4.25M
           TEST_UNASSIGNED | TEST_BIDI |
195
4.25M
           ((flags & IDN2_NONTRANSITIONAL) ?
196
4.25M
            TEST_NONTRANSITIONAL : 0) | ((flags &
197
4.25M
                  IDN2_USE_STD3_ASCII_RULES)
198
4.25M
                 ? 0 :
199
4.25M
                 TEST_ALLOW_STD3_DISALLOWED),
200
4.25M
           p, plen);
201
202
4.25M
      if (rc != IDN2_OK)
203
18.4k
  goto out;
204
4.25M
    }
205
206
4.23M
  dst[0] = 'x';
207
4.23M
  dst[1] = 'n';
208
4.23M
  dst[2] = '-';
209
4.23M
  dst[3] = '-';
210
211
4.23M
  tmpl = *dstlen - 4;
212
4.23M
  rc = idn2_punycode_encode (p, plen, (char *) dst + 4, &tmpl);
213
4.23M
  if (rc != IDN2_OK)
214
948
    goto out;
215
216
217
4.23M
  *dstlen = 4 + tmpl;
218
219
4.23M
  if (check_roundtrip)
220
932
    {
221
932
      if (srclen_org != *dstlen
222
932
    || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
223
503
  {
224
503
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
225
503
    goto out;
226
503
  }
227
932
    }
228
4.22M
  else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
229
4.22M
    {
230
4.22M
      rc = idn2_punycode_decode ((char *) dst + 4, *dstlen - 4,
231
4.22M
         label_u32, &label32_len);
232
4.22M
      if (rc)
233
7.80k
  {
234
7.80k
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
235
7.80k
    goto out;
236
7.80k
  }
237
238
4.22M
      if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
239
0
  {
240
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
241
0
    goto out;
242
0
  }
243
4.22M
    }
244
245
4.22M
  rc = IDN2_OK;
246
247
4.25M
out:
248
4.25M
  free (p);
249
4.25M
  free (src_allocated);
250
4.25M
  return rc;
251
4.22M
}
252
253
#define TR46_TRANSITIONAL_CHECK \
254
0
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
255
#define TR46_NONTRANSITIONAL_CHECK \
256
9.79M
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
257
258
static int
259
_tr46 (const uint8_t *domain_u8, uint8_t **out, int flags)
260
8.15M
{
261
8.15M
  size_t len, it;
262
8.15M
  uint32_t *domain_u32;
263
8.15M
  int err = IDN2_OK, rc;
264
8.15M
  int transitional = 0;
265
8.15M
  int test_flags;
266
267
8.15M
  if (flags & IDN2_TRANSITIONAL)
268
0
    transitional = 1;
269
270
  /* convert UTF-8 to UTF-32 */
271
8.15M
  if (!(domain_u32 =
272
8.15M
  u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
273
0
    {
274
0
      if (errno == ENOMEM)
275
0
  return IDN2_MALLOC;
276
0
      return IDN2_ENCODING_ERROR;
277
0
    }
278
279
8.15M
  size_t len2 = 0;
280
17.2M
  for (it = 0; it < len - 1; it++)
281
9.63M
    {
282
9.63M
      IDNAMap map;
283
284
9.63M
      get_idna_map (domain_u32[it], &map);
285
286
9.63M
      if (map_is (&map, TR46_FLG_DISALLOWED))
287
503k
  {
288
503k
    if (domain_u32[it])
289
503k
      {
290
503k
        free (domain_u32);
291
503k
        return IDN2_DISALLOWED;
292
503k
      }
293
0
    len2++;
294
0
  }
295
9.13M
      else if (map_is (&map, TR46_FLG_MAPPED))
296
3.41k
  {
297
3.41k
    len2 += map.nmappings;
298
3.41k
  }
299
9.13M
      else if (map_is (&map, TR46_FLG_VALID))
300
6.01M
  {
301
6.01M
    len2++;
302
6.01M
  }
303
3.12M
      else if (map_is (&map, TR46_FLG_IGNORED))
304
3.04M
  {
305
3.04M
    continue;
306
3.04M
  }
307
75.1k
      else if (map_is (&map, TR46_FLG_DEVIATION))
308
5.05k
  {
309
5.05k
    if (transitional)
310
0
      {
311
0
        len2 += map.nmappings;
312
0
      }
313
5.05k
    else
314
5.05k
      len2++;
315
5.05k
  }
316
70.0k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
317
70.0k
  {
318
70.0k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
319
70.0k
      {
320
        /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
321
70.0k
        len2++;
322
70.0k
      }
323
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
324
0
      {
325
        /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
326
0
        len2 += map.nmappings;
327
0
      }
328
70.0k
  }
329
9.63M
    }
330
331
  /* Exit early if result is too long.
332
   * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
333
7.65M
  if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
334
199
    {
335
199
      free (domain_u32);
336
199
      return IDN2_TOO_BIG_DOMAIN;
337
199
    }
338
339
7.65M
  uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
340
7.65M
  if (!tmp)
341
0
    {
342
0
      free (domain_u32);
343
0
      return IDN2_MALLOC;
344
0
    }
345
346
7.65M
  len2 = 0;
347
16.6M
  for (it = 0; it < len - 1; it++)
348
9.02M
    {
349
9.02M
      uint32_t c = domain_u32[it];
350
9.02M
      IDNAMap map;
351
352
9.02M
      get_idna_map (c, &map);
353
354
9.02M
      if (map_is (&map, TR46_FLG_DISALLOWED))
355
0
  {
356
0
    tmp[len2++] = c;
357
0
  }
358
9.02M
      else if (map_is (&map, TR46_FLG_MAPPED))
359
3.40k
  {
360
3.40k
    len2 += get_map_data (tmp + len2, &map);
361
3.40k
  }
362
9.02M
      else if (map_is (&map, TR46_FLG_VALID))
363
5.91M
  {
364
5.91M
    tmp[len2++] = c;
365
5.91M
  }
366
3.10M
      else if (map_is (&map, TR46_FLG_IGNORED))
367
3.04M
  {
368
3.04M
    continue;
369
3.04M
  }
370
60.7k
      else if (map_is (&map, TR46_FLG_DEVIATION))
371
4.92k
  {
372
4.92k
    if (transitional)
373
0
      {
374
0
        len2 += get_map_data (tmp + len2, &map);
375
0
      }
376
4.92k
    else
377
4.92k
      tmp[len2++] = c;
378
4.92k
  }
379
55.8k
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
380
55.8k
  {
381
55.8k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
382
55.8k
      {
383
55.8k
        tmp[len2++] = c;
384
55.8k
      }
385
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
386
0
      {
387
0
        len2 += get_map_data (tmp + len2, &map);
388
0
      }
389
55.8k
  }
390
9.02M
    }
391
7.65M
  free (domain_u32);
392
393
  /* Normalize to NFC */
394
7.65M
  tmp[len2] = 0;
395
7.65M
  domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
396
7.65M
  free (tmp);
397
7.65M
  tmp = NULL;
398
399
7.65M
  if (!domain_u32)
400
0
    {
401
0
      if (errno == ENOMEM)
402
0
  return IDN2_MALLOC;
403
0
      return IDN2_ENCODING_ERROR;
404
0
    }
405
406
  /* split into labels and check */
407
7.65M
  uint32_t *e, *s;
408
12.5M
  for (e = s = domain_u32; *e; s = e)
409
4.90M
    {
410
10.1M
      while (*e && *e != '.')
411
5.29M
  e++;
412
413
4.90M
      if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
414
4.90M
    && s[3] == '-')
415
4.82k
  {
416
    /* decode punycode and check result non-transitional */
417
4.82k
    size_t ace_len;
418
4.82k
    uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
419
4.82k
    size_t name_len = IDN2_LABEL_MAX_LENGTH;
420
4.82k
    uint8_t *ace;
421
422
4.82k
    ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
423
4.82k
    if (!ace)
424
0
      {
425
0
        free (domain_u32);
426
0
        if (errno == ENOMEM)
427
0
    return IDN2_MALLOC;
428
0
        return IDN2_ENCODING_ERROR;
429
0
      }
430
431
4.82k
    rc = idn2_punycode_decode ((char *) ace, ace_len,
432
4.82k
             name_u32, &name_len);
433
434
4.82k
    free (ace);
435
436
4.82k
    if (rc)
437
2.83k
      {
438
2.83k
        free (domain_u32);
439
2.83k
        return rc;
440
2.83k
      }
441
442
1.99k
    test_flags = TR46_NONTRANSITIONAL_CHECK;
443
444
1.99k
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
445
1.99k
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
446
447
1.99k
    if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
448
945
      err = rc;
449
1.99k
  }
450
4.89M
      else
451
4.89M
  {
452
4.89M
    test_flags =
453
4.89M
      transitional ? TR46_TRANSITIONAL_CHECK :
454
4.89M
      TR46_NONTRANSITIONAL_CHECK;
455
456
4.89M
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
457
4.89M
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
458
459
4.89M
    if ((rc = _idn2_label_test (test_flags, s, e - s)))
460
5.60k
      err = rc;
461
4.89M
  }
462
463
4.89M
      if (*e)
464
690k
  e++;
465
4.89M
    }
466
467
7.64M
  if (err == IDN2_OK && out)
468
7.64M
    {
469
7.64M
      uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
470
7.64M
      free (domain_u32);
471
472
7.64M
      if (!_out)
473
0
  {
474
0
    if (errno == ENOMEM)
475
0
      return IDN2_MALLOC;
476
0
    return IDN2_ENCODING_ERROR;
477
0
  }
478
479
7.64M
      *out = _out;
480
7.64M
    }
481
5.57k
  else
482
5.57k
    free (domain_u32);
483
484
7.64M
  return err;
485
7.64M
}
486
487
/**
488
 * idn2_lookup_u8:
489
 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
490
 * @lookupname: newly allocated output variable with name to lookup in DNS.
491
 * @flags: optional #idn2_flags to modify behaviour.
492
 *
493
 * Perform IDNA2008 lookup string conversion on domain name @src, as
494
 * described in section 5 of RFC 5891.  Note that the input string
495
 * must be encoded in UTF-8 and be in Unicode NFC form.
496
 *
497
 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
498
 * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
499
 * do already imply %IDN2_NFC_INPUT.
500
 *
501
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
502
 * convert any input A-labels to U-labels and perform additional
503
 * testing. This is default since version 2.2.
504
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
505
 *
506
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
507
 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
508
 * Unicode TR46 non-transitional processing.
509
 *
510
 * Multiple flags may be specified by binary or:ing them together.
511
 *
512
 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
513
 * Previously we were eliminating non-STD3 characters from domain strings
514
 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
515
 * functions. That was an unexpected regression for applications switching
516
 * from libidn and thus it is no longer applied by default.
517
 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
518
 *
519
 * After version 0.11: @lookupname may be NULL to test lookup of @src
520
 * without allocating memory.
521
 *
522
 * Returns: On successful conversion %IDN2_OK is returned, if the
523
 *   output domain or any label would have been too long
524
 *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
525
 *   another error code is returned.
526
 *
527
 * Since: 0.1
528
 **/
529
int
530
idn2_lookup_u8 (const uint8_t *src, uint8_t **lookupname, int flags)
531
8.15M
{
532
8.15M
  size_t lookupnamelen = 0;
533
8.15M
  uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
534
8.15M
  uint8_t *src_allocated = NULL;
535
8.15M
  int rc;
536
537
8.15M
  if (src == NULL)
538
0
    {
539
0
      if (lookupname)
540
0
  *lookupname = NULL;
541
0
      return IDN2_OK;
542
0
    }
543
544
8.15M
  rc = set_default_flags (&flags);
545
8.15M
  if (rc != IDN2_OK)
546
0
    return rc;
547
548
8.15M
  if (!(flags & IDN2_NO_TR46))
549
8.15M
    {
550
8.15M
      uint8_t *out = NULL;
551
552
8.15M
      rc = _tr46 (src, &out, flags);
553
8.15M
      if (rc != IDN2_OK)
554
511k
  return rc;
555
556
7.64M
      src = src_allocated = out;
557
7.64M
    }
558
559
7.64M
  do
560
8.32M
    {
561
8.32M
      const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
562
      /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
563
         and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
564
8.32M
      size_t labellen = end - src;
565
8.32M
      uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
566
8.32M
      size_t tmplen = IDN2_LABEL_MAX_LENGTH;
567
568
8.32M
      rc = label (src, labellen, tmp, &tmplen, flags);
569
8.32M
      if (rc != IDN2_OK)
570
27.9k
  {
571
27.9k
    free (src_allocated);
572
27.9k
    return rc;
573
27.9k
  }
574
575
8.30M
      if (lookupnamelen + tmplen
576
8.30M
    > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
577
202
  {
578
202
    free (src_allocated);
579
202
    return IDN2_TOO_BIG_DOMAIN;
580
202
  }
581
582
8.30M
      memcpy (_lookupname + lookupnamelen, tmp, tmplen);
583
8.30M
      lookupnamelen += tmplen;
584
585
8.30M
      if (*end == '.')
586
685k
  {
587
685k
    if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
588
0
      {
589
0
        free (src_allocated);
590
0
        return IDN2_TOO_BIG_DOMAIN;
591
0
      }
592
593
685k
    _lookupname[lookupnamelen] = '.';
594
685k
    lookupnamelen++;
595
685k
  }
596
8.30M
      _lookupname[lookupnamelen] = '\0';
597
598
8.30M
      src = end;
599
8.30M
    }
600
8.30M
  while (*src++);
601
602
7.61M
  free (src_allocated);
603
604
7.61M
  if (lookupname)
605
7.61M
    {
606
7.61M
      uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
607
608
7.61M
      if (tmp == NULL)
609
0
  return IDN2_MALLOC;
610
611
7.61M
      memcpy (tmp, _lookupname, lookupnamelen + 1);
612
7.61M
      *lookupname = tmp;
613
7.61M
    }
614
615
7.61M
  return IDN2_OK;
616
7.61M
}
617
618
/**
619
 * idn2_lookup_ul:
620
 * @src: input zero-terminated locale encoded string.
621
 * @lookupname: newly allocated output variable with name to lookup in DNS.
622
 * @flags: optional #idn2_flags to modify behaviour.
623
 *
624
 * Perform IDNA2008 lookup string conversion on domain name @src, as
625
 * described in section 5 of RFC 5891.  Note that the input is assumed
626
 * to be encoded in the locale's default coding system, and will be
627
 * transcoded to UTF-8 and NFC normalized by this function.
628
 *
629
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
630
 * convert any input A-labels to U-labels and perform additional
631
 * testing. This is default since version 2.2.
632
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
633
 *
634
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
635
 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
636
 * processing.
637
 *
638
 * Multiple flags may be specified by binary or:ing them together, for
639
 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
640
 *
641
 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
642
 *
643
 * After version 0.11: @lookupname may be NULL to test lookup of @src
644
 * without allocating memory.
645
 *
646
 * Returns: On successful conversion %IDN2_OK is returned, if
647
 *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
648
 *   returned, if the output domain or any label would have been too
649
 *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
650
 *   another error code is returned.
651
 *
652
 * Since: 0.1
653
 **/
654
int
655
idn2_lookup_ul (const char *src, char **lookupname, int flags)
656
0
{
657
0
  uint8_t *utf8src = NULL;
658
0
  int rc;
659
660
0
  if (src)
661
0
    {
662
0
      const char *encoding = locale_charset ();
663
664
0
      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
665
666
0
      if (!utf8src)
667
0
  {
668
0
    if (errno == ENOMEM)
669
0
      return IDN2_MALLOC;
670
0
    return IDN2_ICONV_FAIL;
671
0
  }
672
0
    }
673
674
0
  rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
675
0
           flags | IDN2_NFC_INPUT);
676
677
0
  free (utf8src);
678
679
0
  return rc;
680
0
}
681
682
/**
683
 * idn2_to_ascii_4i:
684
 * @input: zero terminated input Unicode (UCS-4) string.
685
 * @inlen: number of elements in @input.
686
 * @output: output zero terminated string that must have room for at
687
 *       least 63 characters plus the terminating zero.
688
 * @flags: optional #idn2_flags to modify behaviour.
689
 *
690
 * The ToASCII operation takes a sequence of Unicode code points that make
691
 * up one domain label and transforms it into a sequence of code points in
692
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
693
 * the resulting sequence are equivalent labels.
694
 *
695
 * It is important to note that the ToASCII operation can fail.
696
 * ToASCII fails if any step of it fails. If any step of the
697
 * ToASCII operation fails on any label in a domain name, that domain
698
 * name MUST NOT be used as an internationalized domain name.
699
 * The method for dealing with this failure is application-specific.
700
 *
701
 * The inputs to ToASCII are a sequence of code points.
702
 *
703
 * ToASCII never alters a sequence of code points that are all in the ASCII
704
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
705
 * effect as applying it just once.
706
 *
707
 * The default behavior of this function (when flags are zero) is to apply
708
 * the IDNA2008 rules without the TR46 amendments. As the TR46
709
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
710
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
711
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
712
 *
713
 * Warning: With version 2.1.1 until before version 2.3.5 this
714
 * function was deprecated in favor idn2_to_ascii_4i2().  We still
715
 * encourage you to use idn2_to_ascii_4i2() when appropriate.
716
 *
717
 * Returns: On successful conversion %IDN2_OK is returned; if the
718
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
719
 *   returned, or another error code is returned.
720
 *
721
 * Since: 2.0.0
722
 **/
723
int
724
idn2_to_ascii_4i (const uint32_t *input, size_t inlen, char *output,
725
      int flags)
726
0
{
727
0
  char *out;
728
0
  int rc;
729
730
0
  if (!input)
731
0
    {
732
0
      if (output)
733
0
  *output = 0;
734
0
      return IDN2_OK;
735
0
    }
736
737
0
  rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
738
0
  if (rc == IDN2_OK)
739
0
    {
740
0
      size_t len = strlen (out);
741
742
0
      if (len > IDN2_LABEL_MAX_LENGTH)
743
0
  rc = IDN2_TOO_BIG_LABEL;
744
0
      else if (output)
745
0
  strcpy (output, out);
746
747
0
      free (out);
748
0
    }
749
750
0
  return rc;
751
0
}
752
753
/**
754
 * idn2_to_ascii_4i2:
755
 * @input: zero terminated input Unicode (UCS-4) string.
756
 * @inlen: number of elements in @input.
757
 * @output: pointer to newly allocated zero-terminated output string.
758
 * @flags: optional #idn2_flags to modify behaviour.
759
 *
760
 * The ToASCII operation takes a sequence of Unicode code points that make
761
 * up one domain label and transforms it into a sequence of code points in
762
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
763
 * the resulting sequence are equivalent labels.
764
 *
765
 * It is important to note that the ToASCII operation can fail.
766
 * ToASCII fails if any step of it fails. If any step of the
767
 * ToASCII operation fails on any label in a domain name, that domain
768
 * name MUST NOT be used as an internationalized domain name.
769
 * The method for dealing with this failure is application-specific.
770
 *
771
 * The inputs to ToASCII are a sequence of code points.
772
 *
773
 * ToASCII never alters a sequence of code points that are all in the ASCII
774
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
775
 * effect as applying it just once.
776
 *
777
 * The default behavior of this function (when flags are zero) is to apply
778
 * the IDNA2008 rules without the TR46 amendments. As the TR46
779
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
780
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
781
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
782
 *
783
 * Returns: On successful conversion %IDN2_OK is returned; if the
784
 *   output label would have been too long %IDN2_TOO_BIG_LABEL is
785
 *   returned, or another error code is returned.
786
 *
787
 * Since: 2.1.1
788
 **/
789
int
790
idn2_to_ascii_4i2 (const uint32_t *input, size_t inlen, char **output,
791
       int flags)
792
0
{
793
0
  uint32_t *input_u32;
794
0
  uint8_t *input_u8, *output_u8;
795
0
  size_t length;
796
0
  int rc;
797
798
0
  if (!input)
799
0
    {
800
0
      if (output)
801
0
  *output = NULL;
802
0
      return IDN2_OK;
803
0
    }
804
805
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
806
0
  if (!input_u32)
807
0
    return IDN2_MALLOC;
808
809
0
  u32_cpy (input_u32, input, inlen);
810
0
  input_u32[inlen] = 0;
811
812
0
  input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
813
0
  free (input_u32);
814
0
  if (!input_u8)
815
0
    {
816
0
      if (errno == ENOMEM)
817
0
  return IDN2_MALLOC;
818
0
      return IDN2_ENCODING_ERROR;
819
0
    }
820
821
0
  rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
822
0
  free (input_u8);
823
824
0
  if (rc == IDN2_OK)
825
0
    {
826
0
      if (output)
827
0
  *output = (char *) output_u8;
828
0
      else
829
0
  free (output_u8);
830
0
    }
831
832
0
  return rc;
833
0
}
834
835
/**
836
 * idn2_to_ascii_4z:
837
 * @input: zero terminated input Unicode (UCS-4) string.
838
 * @output: pointer to newly allocated zero-terminated output string.
839
 * @flags: optional #idn2_flags to modify behaviour.
840
 *
841
 * Convert UCS-4 domain name to ASCII string using the IDNA2008
842
 * rules.  The domain name may contain several labels, separated by dots.
843
 * The output buffer must be deallocated by the caller.
844
 *
845
 * The default behavior of this function (when flags are zero) is to apply
846
 * the IDNA2008 rules without the TR46 amendments. As the TR46
847
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
848
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
849
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
850
 *
851
 * Return value: Returns %IDN2_OK on success, or error code.
852
 *
853
 * Since: 2.0.0
854
 **/
855
int
856
idn2_to_ascii_4z (const uint32_t *input, char **output, int flags)
857
0
{
858
0
  uint8_t *input_u8;
859
0
  size_t length;
860
0
  int rc;
861
862
0
  if (!input)
863
0
    {
864
0
      if (output)
865
0
  *output = NULL;
866
0
      return IDN2_OK;
867
0
    }
868
869
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
870
0
  if (!input_u8)
871
0
    {
872
0
      if (errno == ENOMEM)
873
0
  return IDN2_MALLOC;
874
0
      return IDN2_ENCODING_ERROR;
875
0
    }
876
877
0
  rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
878
0
  free (input_u8);
879
880
0
  return rc;
881
0
}
882
883
/**
884
 * idn2_to_ascii_8z:
885
 * @input: zero terminated input UTF-8 string.
886
 * @output: pointer to newly allocated output string.
887
 * @flags: optional #idn2_flags to modify behaviour.
888
 *
889
 * Convert UTF-8 domain name to ASCII string using the IDNA2008
890
 * rules.  The domain name may contain several labels, separated by dots.
891
 * The output buffer must be deallocated by the caller.
892
 *
893
 * The default behavior of this function (when flags are zero) is to apply
894
 * the IDNA2008 rules without the TR46 amendments. As the TR46
895
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
896
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
897
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
898
 *
899
 * Return value: Returns %IDN2_OK on success, or error code.
900
 *
901
 * Since: 2.0.0
902
 **/
903
int
904
idn2_to_ascii_8z (const char *input, char **output, int flags)
905
0
{
906
0
  return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
907
0
}
908
909
/**
910
 * idn2_to_ascii_lz:
911
 * @input: zero terminated input UTF-8 string.
912
 * @output: pointer to newly allocated output string.
913
 * @flags: optional #idn2_flags to modify behaviour.
914
 *
915
 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
916
 * rules.  The domain name may contain several labels, separated by dots.
917
 * The output buffer must be deallocated by the caller.
918
 *
919
 * The default behavior of this function (when flags are zero) is to apply
920
 * the IDNA2008 rules without the TR46 amendments. As the TR46
921
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
922
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
923
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
924
 *
925
 * Returns: %IDN2_OK on success, or error code.
926
 * Same as described in idn2_lookup_ul() documentation.
927
 *
928
 * Since: 2.0.0
929
 **/
930
int
931
idn2_to_ascii_lz (const char *input, char **output, int flags)
932
0
{
933
0
  return idn2_lookup_ul (input, output, flags);
934
0
}