Coverage Report

Created: 2023-03-26 07:33

/src/libidn2/lib/lookup.c
Line
Count
Source (jump to first uncovered line)
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2022 Simon Josefsson
3
   Copyright (C) 2017-2022 Tim Ruehsen
4
5
   Libidn2 is free software: you can redistribute it and/or modify it
6
   under the terms of either:
7
8
     * the GNU Lesser General Public License as published by the Free
9
       Software Foundation; either version 3 of the License, or (at
10
       your option) any later version.
11
12
   or
13
14
     * the GNU General Public License as published by the Free
15
       Software Foundation; either version 2 of the License, or (at
16
       your option) any later version.
17
18
   or both in parallel, as here.
19
20
   This program is distributed in the hope that it will be useful,
21
   but WITHOUT ANY WARRANTY; without even the implied warranty of
22
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
   GNU General Public License for more details.
24
25
   You should have received copies of the GNU General Public License and
26
   the GNU Lesser General Public License along with this program.  If
27
   not, see <http://www.gnu.org/licenses/>.
28
*/
29
30
#include <config.h>
31
32
#include "idn2.h"
33
34
#include <errno.h>    /* errno */
35
#include <stdlib.h>   /* malloc, free */
36
37
#include "punycode.h"
38
39
#include <unitypes.h>
40
#include <uniconv.h>    /* u8_strconv_from_locale */
41
#include <uninorm.h>    /* u32_normalize */
42
#include <unistr.h>   /* u8_to_u32 */
43
44
#include "idna.h"   /* _idn2_label_test */
45
#include "tr46map.h"    /* definition for tr46map.c */
46
47
#ifdef HAVE_LIBUNISTRING
48
/* copied from gnulib */
49
# include <limits.h>
50
# define _C_CTYPE_LOWER_N(N) \
51
0
   case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
52
0
   case 'e' + (N): case 'f' + (N): \
53
0
   case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
54
0
   case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
55
0
   case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
56
0
   case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
57
0
   case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
58
0
# define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
59
static inline int
60
c_tolower (int c)
61
0
{
62
0
  switch (c)
63
0
    {
64
0
    _C_CTYPE_UPPER:
65
0
      return c - 'A' + 'a';
66
0
    default:
67
0
      return c;
68
0
    }
69
0
}
70
71
static int
72
c_strncasecmp (const char *s1, const char *s2, size_t n)
73
0
{
74
0
  register const unsigned char *p1 = (const unsigned char *) s1;
75
0
  register const unsigned char *p2 = (const unsigned char *) s2;
76
0
  unsigned char c1, c2;
77
78
0
  if (p1 == p2 || n == 0)
79
0
    return 0;
80
81
0
  do
82
0
    {
83
0
      c1 = c_tolower (*p1);
84
0
      c2 = c_tolower (*p2);
85
86
0
      if (--n == 0 || c1 == '\0')
87
0
  break;
88
89
0
      ++p1;
90
0
      ++p2;
91
0
    }
92
0
  while (c1 == c2);
93
94
0
  if (UCHAR_MAX <= INT_MAX)
95
0
    return c1 - c2;
96
0
  else
97
    /* On machines where 'char' and 'int' are types of the same size, the
98
       difference of two 'unsigned char' values - including the sign bit -
99
       doesn't fit in an 'int'.  */
100
0
    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
101
0
}
102
#else
103
# include <c-strcase.h>
104
#endif
105
106
static int
107
set_default_flags (int *flags)
108
0
{
109
0
  if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
110
0
    return IDN2_INVALID_FLAGS;
111
112
0
  if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
113
0
      && ((*flags) & IDN2_NO_TR46))
114
0
    return IDN2_INVALID_FLAGS;
115
116
0
  if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
117
0
      && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
118
0
    return IDN2_INVALID_FLAGS;
119
120
0
  if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
121
0
    *flags |= IDN2_NONTRANSITIONAL;
122
123
0
  return IDN2_OK;
124
0
}
125
126
static int
127
label (const uint8_t * src, size_t srclen, uint8_t * dst, size_t *dstlen,
128
       int flags)
129
0
{
130
0
  size_t plen;
131
0
  uint32_t *p = NULL;
132
0
  const uint8_t *src_org = NULL;
133
0
  uint8_t *src_allocated = NULL;
134
0
  int rc, check_roundtrip = 0;
135
0
  size_t tmpl, srclen_org = 0;
136
0
  uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
137
0
  size_t label32_len = IDN2_LABEL_MAX_LENGTH;
138
139
0
  if (_idn2_ascii_p (src, srclen))
140
0
    {
141
0
      if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
142
0
    && memcmp (src, "xn--", 4) == 0)
143
0
  {
144
    /*
145
       If the input to this procedure appears to be an A-label
146
       (i.e., it starts in "xn--", interpreted
147
       case-insensitively), the lookup application MAY attempt to
148
       convert it to a U-label, first ensuring that the A-label is
149
       entirely in lowercase (converting it to lowercase if
150
       necessary), and apply the tests of Section 5.4 and the
151
       conversion of Section 5.5 to that form. */
152
0
    rc =
153
0
      _idn2_punycode_decode_internal (srclen - 4, (char *) src + 4,
154
0
              &label32_len, label_u32);
155
0
    if (rc)
156
0
      return rc;
157
158
0
    check_roundtrip = 1;
159
0
    src_org = src;
160
0
    srclen_org = srclen;
161
162
0
    srclen = IDN2_LABEL_MAX_LENGTH;
163
0
    src = src_allocated =
164
0
      u32_to_u8 (label_u32, label32_len, NULL, &srclen);
165
0
    if (!src)
166
0
      {
167
0
        if (errno == ENOMEM)
168
0
    return IDN2_MALLOC;
169
0
        return IDN2_ENCODING_ERROR;
170
0
      }
171
0
  }
172
0
      else
173
0
  {
174
0
    if (srclen > IDN2_LABEL_MAX_LENGTH)
175
0
      return IDN2_TOO_BIG_LABEL;
176
0
    if (srclen > *dstlen)
177
0
      return IDN2_TOO_BIG_DOMAIN;
178
179
0
    memcpy (dst, src, srclen);
180
0
    *dstlen = srclen;
181
0
    return IDN2_OK;
182
0
  }
183
0
    }
184
185
0
  rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
186
0
  if (rc != IDN2_OK)
187
0
    goto out;
188
189
0
  if (!(flags & IDN2_TRANSITIONAL))
190
0
    {
191
0
      rc = _idn2_label_test (TEST_NFC |
192
0
           TEST_2HYPHEN |
193
0
           TEST_LEADING_COMBINING |
194
0
           TEST_DISALLOWED |
195
0
           TEST_CONTEXTJ_RULE |
196
0
           TEST_CONTEXTO_WITH_RULE |
197
0
           TEST_UNASSIGNED | TEST_BIDI |
198
0
           ((flags & IDN2_NONTRANSITIONAL) ?
199
0
            TEST_NONTRANSITIONAL : 0) | ((flags &
200
0
                  IDN2_USE_STD3_ASCII_RULES)
201
0
                 ? 0 :
202
0
                 TEST_ALLOW_STD3_DISALLOWED),
203
0
           p, plen);
204
205
0
      if (rc != IDN2_OK)
206
0
  goto out;
207
0
    }
208
209
0
  dst[0] = 'x';
210
0
  dst[1] = 'n';
211
0
  dst[2] = '-';
212
0
  dst[3] = '-';
213
214
0
  tmpl = *dstlen - 4;
215
0
  rc = _idn2_punycode_encode_internal (plen, p, &tmpl, (char *) dst + 4);
216
0
  if (rc != IDN2_OK)
217
0
    goto out;
218
219
220
0
  *dstlen = 4 + tmpl;
221
222
0
  if (check_roundtrip)
223
0
    {
224
0
      if (srclen_org != *dstlen
225
0
    || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
226
0
  {
227
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
228
0
    goto out;
229
0
  }
230
0
    }
231
0
  else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
232
0
    {
233
0
      rc =
234
0
  _idn2_punycode_decode_internal (*dstlen - 4, (char *) dst + 4,
235
0
          &label32_len, label_u32);
236
0
      if (rc)
237
0
  {
238
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
239
0
    goto out;
240
0
  }
241
242
0
      if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
243
0
  {
244
0
    rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
245
0
    goto out;
246
0
  }
247
0
    }
248
249
0
  rc = IDN2_OK;
250
251
0
out:
252
0
  free (p);
253
0
  free (src_allocated);
254
0
  return rc;
255
0
}
256
257
#define TR46_TRANSITIONAL_CHECK \
258
0
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
259
#define TR46_NONTRANSITIONAL_CHECK \
260
0
  (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
261
262
static int
263
_tr46 (const uint8_t * domain_u8, uint8_t ** out, int flags)
264
0
{
265
0
  size_t len, it;
266
0
  uint32_t *domain_u32;
267
0
  int err = IDN2_OK, rc;
268
0
  int transitional = 0;
269
0
  int test_flags;
270
271
0
  if (flags & IDN2_TRANSITIONAL)
272
0
    transitional = 1;
273
274
  /* convert UTF-8 to UTF-32 */
275
0
  if (!(domain_u32 =
276
0
  u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
277
0
    {
278
0
      if (errno == ENOMEM)
279
0
  return IDN2_MALLOC;
280
0
      return IDN2_ENCODING_ERROR;
281
0
    }
282
283
0
  size_t len2 = 0;
284
0
  for (it = 0; it < len - 1; it++)
285
0
    {
286
0
      IDNAMap map;
287
288
0
      get_idna_map (domain_u32[it], &map);
289
290
0
      if (map_is (&map, TR46_FLG_DISALLOWED))
291
0
  {
292
0
    if (domain_u32[it])
293
0
      {
294
0
        free (domain_u32);
295
0
        return IDN2_DISALLOWED;
296
0
      }
297
0
    len2++;
298
0
  }
299
0
      else if (map_is (&map, TR46_FLG_MAPPED))
300
0
  {
301
0
    len2 += map.nmappings;
302
0
  }
303
0
      else if (map_is (&map, TR46_FLG_VALID))
304
0
  {
305
0
    len2++;
306
0
  }
307
0
      else if (map_is (&map, TR46_FLG_IGNORED))
308
0
  {
309
0
    continue;
310
0
  }
311
0
      else if (map_is (&map, TR46_FLG_DEVIATION))
312
0
  {
313
0
    if (transitional)
314
0
      {
315
0
        len2 += map.nmappings;
316
0
      }
317
0
    else
318
0
      len2++;
319
0
  }
320
0
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
321
0
  {
322
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
323
0
      {
324
        /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
325
0
        len2++;
326
0
      }
327
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
328
0
      {
329
        /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
330
0
        len2 += map.nmappings;
331
0
      }
332
0
  }
333
0
    }
334
335
  /* Exit early if result is too long.
336
   * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
337
0
  if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
338
0
    {
339
0
      free (domain_u32);
340
0
      return IDN2_TOO_BIG_DOMAIN;
341
0
    }
342
343
0
  uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
344
0
  if (!tmp)
345
0
    {
346
0
      free (domain_u32);
347
0
      return IDN2_MALLOC;
348
0
    }
349
350
0
  len2 = 0;
351
0
  for (it = 0; it < len - 1; it++)
352
0
    {
353
0
      uint32_t c = domain_u32[it];
354
0
      IDNAMap map;
355
356
0
      get_idna_map (c, &map);
357
358
0
      if (map_is (&map, TR46_FLG_DISALLOWED))
359
0
  {
360
0
    tmp[len2++] = c;
361
0
  }
362
0
      else if (map_is (&map, TR46_FLG_MAPPED))
363
0
  {
364
0
    len2 += get_map_data (tmp + len2, &map);
365
0
  }
366
0
      else if (map_is (&map, TR46_FLG_VALID))
367
0
  {
368
0
    tmp[len2++] = c;
369
0
  }
370
0
      else if (map_is (&map, TR46_FLG_IGNORED))
371
0
  {
372
0
    continue;
373
0
  }
374
0
      else if (map_is (&map, TR46_FLG_DEVIATION))
375
0
  {
376
0
    if (transitional)
377
0
      {
378
0
        len2 += get_map_data (tmp + len2, &map);
379
0
      }
380
0
    else
381
0
      tmp[len2++] = c;
382
0
  }
383
0
      else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
384
0
  {
385
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
386
0
      {
387
0
        tmp[len2++] = c;
388
0
      }
389
0
    else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
390
0
      {
391
0
        len2 += get_map_data (tmp + len2, &map);
392
0
      }
393
0
  }
394
0
    }
395
0
  free (domain_u32);
396
397
  /* Normalize to NFC */
398
0
  tmp[len2] = 0;
399
0
  domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
400
0
  free (tmp);
401
0
  tmp = NULL;
402
403
0
  if (!domain_u32)
404
0
    {
405
0
      if (errno == ENOMEM)
406
0
  return IDN2_MALLOC;
407
0
      return IDN2_ENCODING_ERROR;
408
0
    }
409
410
  /* split into labels and check */
411
0
  uint32_t *e, *s;
412
0
  for (e = s = domain_u32; *e; s = e)
413
0
    {
414
0
      while (*e && *e != '.')
415
0
  e++;
416
417
0
      if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
418
0
    && s[3] == '-')
419
0
  {
420
    /* decode punycode and check result non-transitional */
421
0
    size_t ace_len;
422
0
    uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
423
0
    size_t name_len = IDN2_LABEL_MAX_LENGTH;
424
0
    uint8_t *ace;
425
426
0
    ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
427
0
    if (!ace)
428
0
      {
429
0
        free (domain_u32);
430
0
        if (errno == ENOMEM)
431
0
    return IDN2_MALLOC;
432
0
        return IDN2_ENCODING_ERROR;
433
0
      }
434
435
0
    rc = _idn2_punycode_decode_internal (ace_len, (char *) ace,
436
0
                 &name_len, name_u32);
437
438
0
    free (ace);
439
440
0
    if (rc)
441
0
      {
442
0
        free (domain_u32);
443
0
        return rc;
444
0
      }
445
446
0
    test_flags = TR46_NONTRANSITIONAL_CHECK;
447
448
0
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
449
0
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
450
451
0
    if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
452
0
      err = rc;
453
0
  }
454
0
      else
455
0
  {
456
0
    test_flags =
457
0
      transitional ? TR46_TRANSITIONAL_CHECK :
458
0
      TR46_NONTRANSITIONAL_CHECK;
459
460
0
    if (!(flags & IDN2_USE_STD3_ASCII_RULES))
461
0
      test_flags |= TEST_ALLOW_STD3_DISALLOWED;
462
463
0
    if ((rc = _idn2_label_test (test_flags, s, e - s)))
464
0
      err = rc;
465
0
  }
466
467
0
      if (*e)
468
0
  e++;
469
0
    }
470
471
0
  if (err == IDN2_OK && out)
472
0
    {
473
0
      uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
474
0
      free (domain_u32);
475
476
0
      if (!_out)
477
0
  {
478
0
    if (errno == ENOMEM)
479
0
      return IDN2_MALLOC;
480
0
    return IDN2_ENCODING_ERROR;
481
0
  }
482
483
0
      *out = _out;
484
0
    }
485
0
  else
486
0
    free (domain_u32);
487
488
0
  return err;
489
0
}
490
491
/**
492
 * idn2_lookup_u8:
493
 * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
494
 * @lookupname: newly allocated output variable with name to lookup in DNS.
495
 * @flags: optional #idn2_flags to modify behaviour.
496
 *
497
 * Perform IDNA2008 lookup string conversion on domain name @src, as
498
 * described in section 5 of RFC 5891.  Note that the input string
499
 * must be encoded in UTF-8 and be in Unicode NFC form.
500
 *
501
 * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
502
 * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
503
 * do already imply %IDN2_NFC_INPUT.
504
 *
505
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
506
 * convert any input A-labels to U-labels and perform additional
507
 * testing. This is default since version 2.2.
508
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
509
 *
510
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
511
 * transitional processing, and %IDN2_NONTRANSITIONAL to enable
512
 * Unicode TR46 non-transitional processing.
513
 *
514
 * Multiple flags may be specified by binary or:ing them together.
515
 *
516
 * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
517
 * Previously we were eliminating non-STD3 characters from domain strings
518
 * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
519
 * functions. That was an unexpected regression for applications switching
520
 * from libidn and thus it is no longer applied by default.
521
 * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
522
 *
523
 * After version 0.11: @lookupname may be NULL to test lookup of @src
524
 * without allocating memory.
525
 *
526
 * Returns: On successful conversion %IDN2_OK is returned, if the
527
 *   output domain or any label would have been too long
528
 *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
529
 *   another error code is returned.
530
 *
531
 * Since: 0.1
532
 **/
533
int
534
idn2_lookup_u8 (const uint8_t * src, uint8_t ** lookupname, int flags)
535
0
{
536
0
  size_t lookupnamelen = 0;
537
0
  uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
538
0
  uint8_t *src_allocated = NULL;
539
0
  int rc;
540
541
0
  if (src == NULL)
542
0
    {
543
0
      if (lookupname)
544
0
  *lookupname = NULL;
545
0
      return IDN2_OK;
546
0
    }
547
548
0
  rc = set_default_flags (&flags);
549
0
  if (rc != IDN2_OK)
550
0
    return rc;
551
552
0
  if (!(flags & IDN2_NO_TR46))
553
0
    {
554
0
      uint8_t *out;
555
556
0
      rc = _tr46 (src, &out, flags);
557
0
      if (rc != IDN2_OK)
558
0
  return rc;
559
560
0
      src = src_allocated = out;
561
0
    }
562
563
0
  do
564
0
    {
565
0
      const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
566
      /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
567
         and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
568
0
      size_t labellen = end - src;
569
0
      uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
570
0
      size_t tmplen = IDN2_LABEL_MAX_LENGTH;
571
572
0
      rc = label (src, labellen, tmp, &tmplen, flags);
573
0
      if (rc != IDN2_OK)
574
0
  {
575
0
    free (src_allocated);
576
0
    return rc;
577
0
  }
578
579
0
      if (lookupnamelen + tmplen
580
0
    > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
581
0
  {
582
0
    free (src_allocated);
583
0
    return IDN2_TOO_BIG_DOMAIN;
584
0
  }
585
586
0
      memcpy (_lookupname + lookupnamelen, tmp, tmplen);
587
0
      lookupnamelen += tmplen;
588
589
0
      if (*end == '.')
590
0
  {
591
0
    if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
592
0
      {
593
0
        free (src_allocated);
594
0
        return IDN2_TOO_BIG_DOMAIN;
595
0
      }
596
597
0
    _lookupname[lookupnamelen] = '.';
598
0
    lookupnamelen++;
599
0
  }
600
0
      _lookupname[lookupnamelen] = '\0';
601
602
0
      src = end;
603
0
    }
604
0
  while (*src++);
605
606
0
  free (src_allocated);
607
608
0
  if (lookupname)
609
0
    {
610
0
      uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
611
612
0
      if (tmp == NULL)
613
0
  return IDN2_MALLOC;
614
615
0
      memcpy (tmp, _lookupname, lookupnamelen + 1);
616
0
      *lookupname = tmp;
617
0
    }
618
619
0
  return IDN2_OK;
620
0
}
621
622
/**
623
 * idn2_lookup_ul:
624
 * @src: input zero-terminated locale encoded string.
625
 * @lookupname: newly allocated output variable with name to lookup in DNS.
626
 * @flags: optional #idn2_flags to modify behaviour.
627
 *
628
 * Perform IDNA2008 lookup string conversion on domain name @src, as
629
 * described in section 5 of RFC 5891.  Note that the input is assumed
630
 * to be encoded in the locale's default coding system, and will be
631
 * transcoded to UTF-8 and NFC normalized by this function.
632
 *
633
 * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
634
 * convert any input A-labels to U-labels and perform additional
635
 * testing. This is default since version 2.2.
636
 * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
637
 *
638
 * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
639
 * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
640
 * processing.
641
 *
642
 * Multiple flags may be specified by binary or:ing them together, for
643
 * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
644
 *
645
 * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
646
 *
647
 * After version 0.11: @lookupname may be NULL to test lookup of @src
648
 * without allocating memory.
649
 *
650
 * Returns: On successful conversion %IDN2_OK is returned, if
651
 *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
652
 *   returned, if the output domain or any label would have been too
653
 *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
654
 *   another error code is returned.
655
 *
656
 * Since: 0.1
657
 **/
658
int
659
idn2_lookup_ul (const char *src, char **lookupname, int flags)
660
0
{
661
0
  uint8_t *utf8src = NULL;
662
0
  int rc;
663
664
0
  if (src)
665
0
    {
666
0
      const char *encoding = locale_charset ();
667
668
0
      utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
669
670
0
      if (!utf8src)
671
0
  {
672
0
    if (errno == ENOMEM)
673
0
      return IDN2_MALLOC;
674
0
    return IDN2_ICONV_FAIL;
675
0
  }
676
0
    }
677
678
0
  rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
679
0
           flags | IDN2_NFC_INPUT);
680
681
0
  free (utf8src);
682
683
0
  return rc;
684
0
}
685
686
/**
687
 * idn2_to_ascii_4i:
688
 * @input: zero terminated input Unicode (UCS-4) string.
689
 * @inlen: number of elements in @input.
690
 * @output: output zero terminated string that must have room for at least 63 characters plus the terminating zero.
691
 * @flags: optional #idn2_flags to modify behaviour.
692
 *
693
 * THIS FUNCTION HAS BEEN DEPRECATED DUE TO A DESIGN FLAW. USE idn2_to_ascii_4i2() INSTEAD !
694
 *
695
 * The ToASCII operation takes a sequence of Unicode code points that make
696
 * up one domain label and transforms it into a sequence of code points in
697
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
698
 * the resulting sequence are equivalent labels.
699
 *
700
 * It is important to note that the ToASCII operation can fail.
701
 * ToASCII fails if any step of it fails. If any step of the
702
 * ToASCII operation fails on any label in a domain name, that domain
703
 * name MUST NOT be used as an internationalized domain name.
704
 * The method for dealing with this failure is application-specific.
705
 *
706
 * The inputs to ToASCII are a sequence of code points.
707
 *
708
 * ToASCII never alters a sequence of code points that are all in the ASCII
709
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
710
 * effect as applying it just once.
711
 *
712
 * The default behavior of this function (when flags are zero) is to apply
713
 * the IDNA2008 rules without the TR46 amendments. As the TR46
714
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
715
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
716
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
717
 *
718
 * Return value: Returns %IDN2_OK on success, or error code.
719
 *
720
 * Since: 2.0.0
721
 *
722
 * Deprecated: 2.1.1: Use idn2_to_ascii_4i2().
723
 **/
724
int
725
idn2_to_ascii_4i (const uint32_t * input, size_t inlen, char *output,
726
      int flags)
727
0
{
728
0
  char *out;
729
0
  int rc;
730
731
0
  if (!input)
732
0
    {
733
0
      if (output)
734
0
  *output = 0;
735
0
      return IDN2_OK;
736
0
    }
737
738
0
  rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
739
0
  if (rc == IDN2_OK)
740
0
    {
741
0
      size_t len = strlen (out);
742
743
0
      if (len > 63)
744
0
  rc = IDN2_TOO_BIG_DOMAIN;
745
0
      else if (output)
746
0
  memcpy (output, out, len);
747
748
0
      free (out);
749
0
    }
750
751
0
  return rc;
752
0
}
753
754
/**
755
 * idn2_to_ascii_4i2:
756
 * @input: zero terminated input Unicode (UCS-4) string.
757
 * @inlen: number of elements in @input.
758
 * @output: pointer to newly allocated zero-terminated output string.
759
 * @flags: optional #idn2_flags to modify behaviour.
760
 *
761
 * The ToASCII operation takes a sequence of Unicode code points that make
762
 * up one domain label and transforms it into a sequence of code points in
763
 * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
764
 * the resulting sequence are equivalent labels.
765
 *
766
 * It is important to note that the ToASCII operation can fail.
767
 * ToASCII fails if any step of it fails. If any step of the
768
 * ToASCII operation fails on any label in a domain name, that domain
769
 * name MUST NOT be used as an internationalized domain name.
770
 * The method for dealing with this failure is application-specific.
771
 *
772
 * The inputs to ToASCII are a sequence of code points.
773
 *
774
 * ToASCII never alters a sequence of code points that are all in the ASCII
775
 * range to begin with (although it could fail). Applying the ToASCII operation multiple
776
 * effect as applying it just once.
777
 *
778
 * The default behavior of this function (when flags are zero) is to apply
779
 * the IDNA2008 rules without the TR46 amendments. As the TR46
780
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
781
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
782
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
783
 *
784
 * Return value: Returns %IDN2_OK on success, or error code.
785
 *
786
 * Since: 2.1.1
787
 **/
788
int
789
idn2_to_ascii_4i2 (const uint32_t * input, size_t inlen, char **output,
790
       int flags)
791
0
{
792
0
  uint32_t *input_u32;
793
0
  uint8_t *input_u8, *output_u8;
794
0
  size_t length;
795
0
  int rc;
796
797
0
  if (!input)
798
0
    {
799
0
      if (output)
800
0
  *output = NULL;
801
0
      return IDN2_OK;
802
0
    }
803
804
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
805
0
  if (!input_u32)
806
0
    return IDN2_MALLOC;
807
808
0
  u32_cpy (input_u32, input, inlen);
809
0
  input_u32[inlen] = 0;
810
811
0
  input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
812
0
  free (input_u32);
813
0
  if (!input_u8)
814
0
    {
815
0
      if (errno == ENOMEM)
816
0
  return IDN2_MALLOC;
817
0
      return IDN2_ENCODING_ERROR;
818
0
    }
819
820
0
  rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
821
0
  free (input_u8);
822
823
0
  if (rc == IDN2_OK)
824
0
    {
825
0
      if (output)
826
0
  *output = (char *) output_u8;
827
0
      else
828
0
  free (output_u8);
829
0
    }
830
831
0
  return rc;
832
0
}
833
834
/**
835
 * idn2_to_ascii_4z:
836
 * @input: zero terminated input Unicode (UCS-4) string.
837
 * @output: pointer to newly allocated zero-terminated output string.
838
 * @flags: optional #idn2_flags to modify behaviour.
839
 *
840
 * Convert UCS-4 domain name to ASCII string using the IDNA2008
841
 * rules.  The domain name may contain several labels, separated by dots.
842
 * The output buffer must be deallocated by the caller.
843
 *
844
 * The default behavior of this function (when flags are zero) is to apply
845
 * the IDNA2008 rules without the TR46 amendments. As the TR46
846
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
847
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
848
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
849
 *
850
 * Return value: Returns %IDN2_OK on success, or error code.
851
 *
852
 * Since: 2.0.0
853
 **/
854
int
855
idn2_to_ascii_4z (const uint32_t * input, char **output, int flags)
856
0
{
857
0
  uint8_t *input_u8;
858
0
  size_t length;
859
0
  int rc;
860
861
0
  if (!input)
862
0
    {
863
0
      if (output)
864
0
  *output = NULL;
865
0
      return IDN2_OK;
866
0
    }
867
868
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
869
0
  if (!input_u8)
870
0
    {
871
0
      if (errno == ENOMEM)
872
0
  return IDN2_MALLOC;
873
0
      return IDN2_ENCODING_ERROR;
874
0
    }
875
876
0
  rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
877
0
  free (input_u8);
878
879
0
  return rc;
880
0
}
881
882
/**
883
 * idn2_to_ascii_8z:
884
 * @input: zero terminated input UTF-8 string.
885
 * @output: pointer to newly allocated output string.
886
 * @flags: optional #idn2_flags to modify behaviour.
887
 *
888
 * Convert UTF-8 domain name to ASCII string using the IDNA2008
889
 * rules.  The domain name may contain several labels, separated by dots.
890
 * The output buffer must be deallocated by the caller.
891
 *
892
 * The default behavior of this function (when flags are zero) is to apply
893
 * the IDNA2008 rules without the TR46 amendments. As the TR46
894
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
895
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
896
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
897
 *
898
 * Return value: Returns %IDN2_OK on success, or error code.
899
 *
900
 * Since: 2.0.0
901
 **/
902
int
903
idn2_to_ascii_8z (const char *input, char **output, int flags)
904
0
{
905
0
  return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
906
0
}
907
908
/**
909
 * idn2_to_ascii_lz:
910
 * @input: zero terminated input UTF-8 string.
911
 * @output: pointer to newly allocated output string.
912
 * @flags: optional #idn2_flags to modify behaviour.
913
 *
914
 * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
915
 * rules.  The domain name may contain several labels, separated by dots.
916
 * The output buffer must be deallocated by the caller.
917
 *
918
 * The default behavior of this function (when flags are zero) is to apply
919
 * the IDNA2008 rules without the TR46 amendments. As the TR46
920
 * non-transitional processing is nowadays ubiquitous, when unsure, it is
921
 * recommended to call this function with the %IDN2_NONTRANSITIONAL
922
 * and the %IDN2_NFC_INPUT flags for compatibility with other software.
923
 *
924
 * Returns: %IDN2_OK on success, or error code.
925
 * Same as described in idn2_lookup_ul() documentation.
926
 *
927
 * Since: 2.0.0
928
 **/
929
int
930
idn2_to_ascii_lz (const char *input, char **output, int flags)
931
0
{
932
0
  return idn2_lookup_ul (input, output, flags);
933
0
}