Coverage Report

Created: 2025-10-10 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn/lib/idna.c
Line
Count
Source
1
/* idna.c --- Prototypes for Internationalized Domain Name library.
2
   Copyright (C) 2002-2025 Simon Josefsson
3
4
   This file is part of GNU Libidn.
5
6
   GNU Libidn is free software: you can redistribute it and/or
7
   modify it under the terms of either:
8
9
     * the GNU Lesser General Public License as published by the Free
10
       Software Foundation; either version 3 of the License, or (at
11
       your option) any later version.
12
13
   or
14
15
     * the GNU General Public License as published by the Free
16
       Software Foundation; either version 2 of the License, or (at
17
       your option) any later version.
18
19
   or both in parallel, as here.
20
21
   GNU Libidn is distributed in the hope that it will be useful,
22
   but WITHOUT ANY WARRANTY; without even the implied warranty of
23
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
   General Public License for more details.
25
26
   You should have received copies of the GNU General Public License and
27
   the GNU Lesser General Public License along with this program.  If
28
   not, see <https://www.gnu.org/licenses/>. */
29
30
#ifdef HAVE_CONFIG_H
31
# include "config.h"
32
#endif
33
34
#include <stdlib.h>
35
#include <string.h>
36
#include <stringprep.h>
37
#include <punycode.h>
38
39
#include "idna.h"
40
41
/* Get c_strcasecmp. */
42
#include <c-strcase.h>
43
44
488k
#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
45
483k
     (c) == 0xFF0E || (c) == 0xFF61)
46
47
/* Core functions */
48
49
/**
50
 * idna_to_ascii_4i:
51
 * @in: input array with unicode code points.
52
 * @inlen: length of input array with unicode code points.
53
 * @out: output zero terminated string that must have room for at
54
 *       least 63 characters plus the terminating zero.
55
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
56
 *   %IDNA_USE_STD3_ASCII_RULES.
57
 *
58
 * The ToASCII operation takes a sequence of Unicode code points that
59
 * make up one domain label and transforms it into a sequence of code
60
 * points in the ASCII range (0..7F). If ToASCII succeeds, the
61
 * original sequence and the resulting sequence are equivalent labels.
62
 *
63
 * It is important to note that the ToASCII operation can fail. ToASCII
64
 * fails if any step of it fails. If any step of the ToASCII operation
65
 * fails on any label in a domain name, that domain name MUST NOT be used
66
 * as an internationalized domain name. The method for deadling with this
67
 * failure is application-specific.
68
 *
69
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
70
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
71
 * sequence of ASCII code points or a failure condition.
72
 *
73
 * ToASCII never alters a sequence of code points that are all in the ASCII
74
 * range to begin with (although it could fail). Applying the ToASCII
75
 * operation multiple times has exactly the same effect as applying it just
76
 * once.
77
 *
78
 * Return value: Returns 0 on success, or an #Idna_rc error code.
79
 */
80
int
81
idna_to_ascii_4i (const uint32_t *in, size_t inlen, char *out, int flags)
82
20.7k
{
83
20.7k
  size_t len, outlen;
84
20.7k
  uint32_t *src;    /* XXX don't need to copy data? */
85
20.7k
  int rc;
86
87
  /*
88
   * ToASCII consists of the following steps:
89
   *
90
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
91
   * then skip to step 3.
92
   */
93
94
20.7k
  {
95
20.7k
    size_t i;
96
20.7k
    int inasciirange;
97
98
20.7k
    inasciirange = 1;
99
319k
    for (i = 0; i < inlen; i++)
100
298k
      if (in[i] > 0x7F)
101
232k
  inasciirange = 0;
102
20.7k
    if (inasciirange)
103
7.14k
      {
104
7.14k
  src = malloc (sizeof (in[0]) * (inlen + 1));
105
7.14k
  if (src == NULL)
106
0
    return IDNA_MALLOC_ERROR;
107
108
7.14k
  memcpy (src, in, sizeof (in[0]) * inlen);
109
7.14k
  src[inlen] = 0;
110
111
7.14k
  goto step3;
112
7.14k
      }
113
20.7k
  }
114
115
  /*
116
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
117
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
118
   */
119
120
13.6k
  {
121
13.6k
    char *p;
122
123
13.6k
    p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
124
13.6k
    if (p == NULL)
125
1.24k
      return IDNA_MALLOC_ERROR;
126
127
12.3k
    len = strlen (p);
128
12.3k
    do
129
17.2k
      {
130
17.2k
  char *newp;
131
132
17.2k
  len = 2 * len + 10; /* XXX better guess? */
133
17.2k
  newp = realloc (p, len);
134
17.2k
  if (newp == NULL)
135
0
    {
136
0
      free (p);
137
0
      return IDNA_MALLOC_ERROR;
138
0
    }
139
17.2k
  p = newp;
140
141
17.2k
  if (flags & IDNA_ALLOW_UNASSIGNED)
142
8.72k
    rc = stringprep_nameprep (p, len);
143
8.48k
  else
144
8.48k
    rc = stringprep_nameprep_no_unassigned (p, len);
145
17.2k
      }
146
17.2k
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
147
148
12.3k
    if (rc != STRINGPREP_OK)
149
3.26k
      {
150
3.26k
  free (p);
151
3.26k
  return IDNA_STRINGPREP_ERROR;
152
3.26k
      }
153
154
9.10k
    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
155
156
9.10k
    free (p);
157
158
9.10k
    if (!src)
159
0
      return IDNA_MALLOC_ERROR;
160
9.10k
  }
161
162
16.2k
step3:
163
  /*
164
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
165
   *
166
   * (a) Verify the absence of non-LDH ASCII code points; that is,
167
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
168
   *
169
   * (b) Verify the absence of leading and trailing hyphen-minus;
170
   * that is, the absence of U+002D at the beginning and end of
171
   * the sequence.
172
   */
173
174
16.2k
  if (flags & IDNA_USE_STD3_ASCII_RULES)
175
8.56k
    {
176
8.56k
      size_t i;
177
178
86.4k
      for (i = 0; src[i]; i++)
179
80.5k
  if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
180
78.9k
      (src[i] >= 0x3A && src[i] <= 0x40) ||
181
78.6k
      (src[i] >= 0x5B && src[i] <= 0x60) ||
182
78.3k
      (src[i] >= 0x7B && src[i] <= 0x7F))
183
2.65k
    {
184
2.65k
      free (src);
185
2.65k
      return IDNA_CONTAINS_NON_LDH;
186
2.65k
    }
187
188
5.90k
      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
189
463
  {
190
463
    free (src);
191
463
    return IDNA_CONTAINS_MINUS;
192
463
  }
193
5.90k
    }
194
195
  /*
196
   * 4. If all code points in the sequence are in the ASCII range
197
   * (0..7F), then skip to step 8.
198
   */
199
200
13.1k
  {
201
13.1k
    size_t i;
202
13.1k
    int inasciirange;
203
204
13.1k
    inasciirange = 1;
205
848k
    for (i = 0; src[i]; i++)
206
835k
      {
207
835k
  if (src[i] > 0x7F)
208
677k
    inasciirange = 0;
209
  /* copy string to output buffer if we are about to skip to step8 */
210
835k
  if (i < 64)
211
124k
    out[i] = src[i];
212
835k
      }
213
13.1k
    if (i < 64)
214
12.4k
      out[i] = '\0';
215
643
    else
216
643
      {
217
643
  free (src);
218
643
  return IDNA_INVALID_LENGTH;
219
643
      }
220
12.4k
    if (inasciirange)
221
5.92k
      goto step8;
222
12.4k
  }
223
224
  /*
225
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
226
   *
227
   */
228
229
6.56k
  {
230
6.56k
    size_t i;
231
6.56k
    int match;
232
233
6.56k
    match = 1;
234
14.2k
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
235
7.73k
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
236
6.29k
  match = 0;
237
6.56k
    if (match)
238
268
      {
239
268
  free (src);
240
268
  return IDNA_CONTAINS_ACE_PREFIX;
241
268
      }
242
6.56k
  }
243
244
  /*
245
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
246
   * and fail if there is an error.
247
   */
248
80.0k
  for (len = 0; src[len]; len++)
249
73.7k
    ;
250
6.29k
  src[len] = '\0';
251
6.29k
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
252
6.29k
  rc = punycode_encode (len, src, NULL,
253
6.29k
      &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
254
6.29k
  if (rc != PUNYCODE_SUCCESS)
255
740
    {
256
740
      free (src);
257
740
      return IDNA_PUNYCODE_ERROR;
258
740
    }
259
5.55k
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
260
261
  /*
262
   * 7. Prepend the ACE prefix.
263
   */
264
265
5.55k
  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
266
267
  /*
268
   * 8. Verify that the number of code points is in the range 1 to 63
269
   * inclusive (0 is excluded).
270
   */
271
272
11.4k
step8:
273
11.4k
  free (src);
274
11.4k
  if (strlen (out) < 1)
275
1.21k
    return IDNA_INVALID_LENGTH;
276
277
10.2k
  return IDNA_SUCCESS;
278
11.4k
}
279
280
/* ToUnicode().  May realloc() utf8in.  Will free utf8in unconditionally. */
281
static int
282
idna_to_unicode_internal (char *utf8in,
283
        uint32_t *out, size_t *outlen, int flags)
284
29.8k
{
285
29.8k
  int rc;
286
29.8k
  char tmpout[64];
287
29.8k
  size_t utf8len = strlen (utf8in) + 1;
288
29.8k
  size_t addlen = 0, addinc = utf8len / 10 + 1;
289
290
  /*
291
   * ToUnicode consists of the following steps:
292
   *
293
   * 1. If the sequence contains any code points outside the ASCII range
294
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
295
   */
296
297
29.8k
  {
298
29.8k
    size_t i;
299
29.8k
    int inasciirange;
300
301
29.8k
    inasciirange = 1;
302
572k
    for (i = 0; utf8in[i]; i++)
303
542k
      if (utf8in[i] & ~0x7F)
304
175k
  inasciirange = 0;
305
29.8k
    if (inasciirange)
306
17.4k
      goto step3;
307
29.8k
  }
308
309
  /*
310
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
311
   * error. (If step 3 of ToASCII is also performed here, it will not
312
   * affect the overall behavior of ToUnicode, but it is not
313
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
314
   */
315
12.4k
  do
316
24.5k
    {
317
24.5k
      char *newp = realloc (utf8in, utf8len + addlen);
318
24.5k
      if (newp == NULL)
319
0
  {
320
0
    free (utf8in);
321
0
    return IDNA_MALLOC_ERROR;
322
0
  }
323
24.5k
      utf8in = newp;
324
24.5k
      if (flags & IDNA_ALLOW_UNASSIGNED)
325
12.9k
  rc = stringprep_nameprep (utf8in, utf8len + addlen);
326
11.5k
      else
327
11.5k
  rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
328
24.5k
      addlen += addinc;
329
24.5k
      addinc *= 2;
330
24.5k
    }
331
24.5k
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
332
333
12.4k
  if (rc != STRINGPREP_OK)
334
5.16k
    {
335
5.16k
      free (utf8in);
336
5.16k
      return IDNA_STRINGPREP_ERROR;
337
5.16k
    }
338
339
  /* 3. Verify that the sequence begins with the ACE prefix, and save a
340
   * copy of the sequence.
341
   * ... The ToASCII and ToUnicode operations MUST recognize the ACE
342
   prefix in a case-insensitive manner.
343
   */
344
345
24.7k
step3:
346
24.7k
  if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
347
10.6k
    {
348
10.6k
      free (utf8in);
349
10.6k
      return IDNA_NO_ACE_PREFIX;
350
10.6k
    }
351
352
  /* 4. Remove the ACE prefix.
353
   */
354
355
14.1k
  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
356
14.1k
     strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
357
358
  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
359
   * and fail if there is an error. Save a copy of the result of
360
   * this step.
361
   */
362
363
14.1k
  (*outlen)--;      /* reserve one for the zero */
364
365
14.1k
  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
366
14.1k
  if (rc != PUNYCODE_SUCCESS)
367
3.53k
    {
368
3.53k
      free (utf8in);
369
3.53k
      return IDNA_PUNYCODE_ERROR;
370
3.53k
    }
371
372
10.5k
  out[*outlen] = 0;   /* add zero */
373
374
  /* 6. Apply ToASCII.
375
   */
376
377
10.5k
  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
378
10.5k
  if (rc != IDNA_SUCCESS)
379
6.32k
    {
380
6.32k
      free (utf8in);
381
6.32k
      return rc;
382
6.32k
    }
383
384
  /* 7. Verify that the result of step 6 matches the saved copy from
385
   * step 3, using a case-insensitive ASCII comparison.
386
   */
387
388
4.24k
  if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
389
3.65k
    {
390
3.65k
      free (utf8in);
391
3.65k
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
392
3.65k
    }
393
394
  /* 8. Return the saved copy from step 5.
395
   */
396
397
4.24k
  free (utf8in);
398
594
  return IDNA_SUCCESS;
399
4.24k
}
400
401
/**
402
 * idna_to_unicode_44i:
403
 * @in: input array with unicode code points.
404
 * @inlen: length of input array with unicode code points.
405
 * @out: output array with unicode code points.
406
 * @outlen: on input, maximum size of output array with unicode code points,
407
 *          on exit, actual size of output array with unicode code points.
408
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
409
 *   %IDNA_USE_STD3_ASCII_RULES.
410
 *
411
 * The ToUnicode operation takes a sequence of Unicode code points
412
 * that make up one domain label and returns a sequence of Unicode
413
 * code points. If the input sequence is a label in ACE form, then the
414
 * result is an equivalent internationalized label that is not in ACE
415
 * form, otherwise the original sequence is returned unaltered.
416
 *
417
 * ToUnicode never fails. If any step fails, then the original input
418
 * sequence is returned immediately in that step.
419
 *
420
 * The Punycode decoder can never output more code points than it
421
 * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
422
 * the number of octets needed to represent a sequence of code points
423
 * depends on the particular character encoding used.
424
 *
425
 * The inputs to ToUnicode are a sequence of code points, the
426
 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
427
 * ToUnicode is always a sequence of Unicode code points.
428
 *
429
 * Return value: Returns #Idna_rc error condition, but it must only be
430
 *   used for debugging purposes.  The output buffer is always
431
 *   guaranteed to contain the correct data according to the
432
 *   specification (sans malloc induced errors).  NB!  This means that
433
 *   you normally ignore the return code from this function, as
434
 *   checking it means breaking the standard.
435
 */
436
int
437
idna_to_unicode_44i (const uint32_t *in, size_t inlen,
438
         uint32_t *out, size_t *outlen, int flags)
439
31.6k
{
440
31.6k
  int rc;
441
31.6k
  size_t outlensave = *outlen;
442
31.6k
  char *p;
443
444
31.6k
  p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
445
31.6k
  if (p == NULL)
446
1.79k
    return IDNA_MALLOC_ERROR;
447
448
29.8k
  rc = idna_to_unicode_internal (p, out, outlen, flags);
449
29.8k
  if (rc != IDNA_SUCCESS)
450
29.2k
    {
451
29.2k
      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
452
29.2k
           inlen : outlensave));
453
29.2k
      *outlen = inlen;
454
29.2k
    }
455
456
  /* p is freed in idna_to_unicode_internal.  */
457
458
29.8k
  return rc;
459
31.6k
}
460
461
/* Wrappers that handle several labels */
462
463
/**
464
 * idna_to_ascii_4z:
465
 * @input: zero terminated input Unicode string.
466
 * @output: pointer to newly allocated output string.
467
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
468
 *   %IDNA_USE_STD3_ASCII_RULES.
469
 *
470
 * Convert UCS-4 domain name to ASCII string.  The domain name may
471
 * contain several labels, separated by dots.  The output buffer must
472
 * be deallocated by the caller.
473
 *
474
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
475
 **/
476
int
477
idna_to_ascii_4z (const uint32_t *input, char **output, int flags)
478
5.08k
{
479
5.08k
  const uint32_t *start = input;
480
5.08k
  const uint32_t *end;
481
5.08k
  char buf[64];
482
5.08k
  char *out = NULL;
483
5.08k
  int rc;
484
485
  /* 1) Whenever dots are used as label separators, the following
486
     characters MUST be recognized as dots: U+002E (full stop),
487
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
488
     U+FF61 (halfwidth ideographic full stop). */
489
490
5.08k
  if (input[0] == 0)
491
126
    {
492
      /* Handle implicit zero-length root label. */
493
126
      *output = malloc (1);
494
126
      if (!*output)
495
0
  return IDNA_MALLOC_ERROR;
496
126
      strcpy (*output, "");
497
126
      return IDNA_SUCCESS;
498
126
    }
499
500
4.95k
  if (DOTP (input[0]) && input[1] == 0)
501
100
    {
502
      /* Handle explicit zero-length root label. */
503
100
      *output = malloc (2);
504
100
      if (!*output)
505
0
  return IDNA_MALLOC_ERROR;
506
100
      strcpy (*output, ".");
507
100
      return IDNA_SUCCESS;
508
100
    }
509
510
4.85k
  *output = NULL;
511
4.85k
  do
512
8.70k
    {
513
8.70k
      end = start;
514
515
65.0k
      for (; *end && !DOTP (*end); end++)
516
56.3k
  ;
517
518
8.70k
      if (*end == '\0' && start == end)
519
48
  {
520
    /* Handle explicit zero-length root label. */
521
48
    buf[0] = '\0';
522
48
  }
523
8.65k
      else
524
8.65k
  {
525
8.65k
    rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
526
8.65k
    if (rc != IDNA_SUCCESS)
527
2.89k
      {
528
2.89k
        free (out);
529
2.89k
        return rc;
530
2.89k
      }
531
8.65k
  }
532
533
5.81k
      if (out)
534
3.70k
  {
535
3.70k
    size_t l = strlen (out) + 1 + strlen (buf) + 1;
536
3.70k
    char *newp = realloc (out, l);
537
3.70k
    if (!newp)
538
0
      {
539
0
        free (out);
540
0
        return IDNA_MALLOC_ERROR;
541
0
      }
542
3.70k
    out = newp;
543
3.70k
    strcat (out, ".");
544
3.70k
    strcat (out, buf);
545
3.70k
  }
546
2.11k
      else
547
2.11k
  {
548
2.11k
    out = strdup (buf);
549
2.11k
    if (!out)
550
0
      return IDNA_MALLOC_ERROR;
551
2.11k
  }
552
553
5.81k
      start = end + 1;
554
5.81k
    }
555
5.81k
  while (*end);
556
557
1.96k
  *output = out;
558
559
1.96k
  return IDNA_SUCCESS;
560
4.85k
}
561
562
/**
563
 * idna_to_ascii_8z:
564
 * @input: zero terminated input UTF-8 string.
565
 * @output: pointer to newly allocated output string.
566
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
567
 *   %IDNA_USE_STD3_ASCII_RULES.
568
 *
569
 * Convert UTF-8 domain name to ASCII string.  The domain name may
570
 * contain several labels, separated by dots.  The output buffer must
571
 * be deallocated by the caller.
572
 *
573
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
574
 **/
575
int
576
idna_to_ascii_8z (const char *input, char **output, int flags)
577
4.22k
{
578
4.22k
  uint32_t *ucs4;
579
4.22k
  size_t ucs4len;
580
4.22k
  int rc;
581
582
4.22k
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
583
4.22k
  if (!ucs4)
584
660
    return IDNA_ICONV_ERROR;
585
586
3.56k
  rc = idna_to_ascii_4z (ucs4, output, flags);
587
588
3.56k
  free (ucs4);
589
590
3.56k
  return rc;
591
592
4.22k
}
593
594
/**
595
 * idna_to_ascii_lz:
596
 * @input: zero terminated input string encoded in the current locale's
597
 *   character set.
598
 * @output: pointer to newly allocated output string.
599
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
600
 *   %IDNA_USE_STD3_ASCII_RULES.
601
 *
602
 * Convert domain name in the locale's encoding to ASCII string.  The
603
 * domain name may contain several labels, separated by dots.  The
604
 * output buffer must be deallocated by the caller.
605
 *
606
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
607
 **/
608
int
609
idna_to_ascii_lz (const char *input, char **output, int flags)
610
3.67k
{
611
3.67k
  char *utf8;
612
3.67k
  int rc;
613
614
3.67k
  utf8 = stringprep_locale_to_utf8 (input);
615
3.67k
  if (!utf8)
616
3.12k
    return IDNA_ICONV_ERROR;
617
618
548
  rc = idna_to_ascii_8z (utf8, output, flags);
619
620
548
  free (utf8);
621
622
548
  return rc;
623
3.67k
}
624
625
/**
626
 * idna_to_unicode_4z4z:
627
 * @input: zero-terminated Unicode string.
628
 * @output: pointer to newly allocated output Unicode string.
629
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
630
 *   %IDNA_USE_STD3_ASCII_RULES.
631
 *
632
 * Convert possibly ACE encoded domain name in UCS-4 format into a
633
 * UCS-4 string.  The domain name may contain several labels,
634
 * separated by dots.  The output buffer must be deallocated by the
635
 * caller.
636
 *
637
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
638
 **/
639
int
640
idna_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
641
13.2k
{
642
13.2k
  const uint32_t *start = input;
643
13.2k
  const uint32_t *end;
644
13.2k
  uint32_t *buf;
645
13.2k
  size_t buflen;
646
13.2k
  uint32_t *out = NULL;
647
13.2k
  size_t outlen = 0;
648
13.2k
  int rc;
649
650
13.2k
  *output = NULL;
651
652
13.2k
  do
653
29.5k
    {
654
29.5k
      end = start;
655
656
431k
      for (; *end && !DOTP (*end); end++)
657
401k
  ;
658
659
29.5k
      buflen = (size_t) (end - start);
660
29.5k
      buf = malloc (sizeof (buf[0]) * (buflen + 1));
661
29.5k
      if (!buf)
662
0
  {
663
0
    free (out);
664
0
    return IDNA_MALLOC_ERROR;
665
0
  }
666
667
      /* don't check for non-malloc return codes as per
668
         specification! */
669
29.5k
      rc = idna_to_unicode_44i (start, (size_t) (end - start),
670
29.5k
        buf, &buflen, flags);
671
29.5k
      if (rc == IDNA_MALLOC_ERROR)
672
898
  {
673
898
    free (out);
674
898
    return IDNA_MALLOC_ERROR;
675
898
  }
676
677
28.6k
      if (out)
678
16.2k
  {
679
16.2k
    uint32_t *newp = realloc (out,
680
16.2k
            sizeof (out[0])
681
16.2k
            * (outlen + 1 + buflen + 1));
682
16.2k
    if (!newp)
683
0
      {
684
0
        free (buf);
685
0
        free (out);
686
0
        return IDNA_MALLOC_ERROR;
687
0
      }
688
16.2k
    out = newp;
689
16.2k
    out[outlen++] = 0x002E; /* '.' (full stop) */
690
16.2k
    memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
691
16.2k
    outlen += buflen;
692
16.2k
    out[outlen] = 0x0;
693
16.2k
    free (buf);
694
16.2k
  }
695
12.3k
      else
696
12.3k
  {
697
12.3k
    out = buf;
698
12.3k
    outlen = buflen;
699
12.3k
    out[outlen] = 0x0;
700
12.3k
  }
701
702
28.6k
      start = end + 1;
703
28.6k
    }
704
28.6k
  while (*end);
705
706
12.3k
  *output = out;
707
708
12.3k
  return IDNA_SUCCESS;
709
13.2k
}
710
711
/**
712
 * idna_to_unicode_8z4z:
713
 * @input: zero-terminated UTF-8 string.
714
 * @output: pointer to newly allocated output Unicode string.
715
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
716
 *   %IDNA_USE_STD3_ASCII_RULES.
717
 *
718
 * Convert possibly ACE encoded domain name in UTF-8 format into a
719
 * UCS-4 string.  The domain name may contain several labels,
720
 * separated by dots.  The output buffer must be deallocated by the
721
 * caller.
722
 *
723
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
724
 **/
725
int
726
idna_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
727
12.7k
{
728
12.7k
  uint32_t *ucs4;
729
12.7k
  size_t ucs4len;
730
12.7k
  int rc;
731
732
12.7k
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
733
12.7k
  if (!ucs4)
734
1.61k
    return IDNA_ICONV_ERROR;
735
736
11.1k
  rc = idna_to_unicode_4z4z (ucs4, output, flags);
737
11.1k
  free (ucs4);
738
739
11.1k
  return rc;
740
12.7k
}
741
742
/**
743
 * idna_to_unicode_8z8z:
744
 * @input: zero-terminated UTF-8 string.
745
 * @output: pointer to newly allocated output UTF-8 string.
746
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
747
 *   %IDNA_USE_STD3_ASCII_RULES.
748
 *
749
 * Convert possibly ACE encoded domain name in UTF-8 format into a
750
 * UTF-8 string.  The domain name may contain several labels,
751
 * separated by dots.  The output buffer must be deallocated by the
752
 * caller.
753
 *
754
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
755
 **/
756
int
757
idna_to_unicode_8z8z (const char *input, char **output, int flags)
758
10.6k
{
759
10.6k
  uint32_t *ucs4;
760
10.6k
  int rc;
761
762
10.6k
  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
763
10.6k
  if (rc != IDNA_SUCCESS)
764
1.14k
    return rc;
765
766
9.46k
  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
767
9.46k
  free (ucs4);
768
769
9.46k
  if (!*output)
770
0
    return IDNA_ICONV_ERROR;
771
772
9.46k
  return IDNA_SUCCESS;
773
9.46k
}
774
775
/**
776
 * idna_to_unicode_8zlz:
777
 * @input: zero-terminated UTF-8 string.
778
 * @output: pointer to newly allocated output string encoded in the
779
 *   current locale's character set.
780
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
781
 *   %IDNA_USE_STD3_ASCII_RULES.
782
 *
783
 * Convert possibly ACE encoded domain name in UTF-8 format into a
784
 * string encoded in the current locale's character set.  The domain
785
 * name may contain several labels, separated by dots.  The output
786
 * buffer must be deallocated by the caller.
787
 *
788
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
789
 **/
790
int
791
idna_to_unicode_8zlz (const char *input, char **output, int flags)
792
6.14k
{
793
6.14k
  char *utf8;
794
6.14k
  int rc;
795
796
6.14k
  rc = idna_to_unicode_8z8z (input, &utf8, flags);
797
6.14k
  if (rc != IDNA_SUCCESS)
798
570
    return rc;
799
800
5.57k
  *output = stringprep_utf8_to_locale (utf8);
801
5.57k
  free (utf8);
802
803
5.57k
  if (!*output)
804
2.31k
    return IDNA_ICONV_ERROR;
805
806
3.26k
  return IDNA_SUCCESS;
807
5.57k
}
808
809
/**
810
 * idna_to_unicode_lzlz:
811
 * @input: zero-terminated string encoded in the current locale's
812
 *   character set.
813
 * @output: pointer to newly allocated output string encoded in the
814
 *   current locale's character set.
815
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
816
 *   %IDNA_USE_STD3_ASCII_RULES.
817
 *
818
 * Convert possibly ACE encoded domain name in the locale's character
819
 * set into a string encoded in the current locale's character set.
820
 * The domain name may contain several labels, separated by dots.  The
821
 * output buffer must be deallocated by the caller.
822
 *
823
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
824
 **/
825
int
826
idna_to_unicode_lzlz (const char *input, char **output, int flags)
827
4.45k
{
828
4.45k
  char *utf8;
829
4.45k
  int rc;
830
831
4.45k
  utf8 = stringprep_locale_to_utf8 (input);
832
4.45k
  if (!utf8)
833
2.75k
    return IDNA_ICONV_ERROR;
834
835
1.69k
  rc = idna_to_unicode_8zlz (utf8, output, flags);
836
1.69k
  free (utf8);
837
838
1.69k
  return rc;
839
4.45k
}
840
841
/**
842
 * IDNA_ACE_PREFIX
843
 *
844
 * The IANA allocated prefix to use for IDNA. "xn--"
845
 */
846
847
/**
848
 * Idna_rc:
849
 * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
850
 *   always be zero, the remaining ones are only guaranteed to hold
851
 *   non-zero values, for logical comparison purposes.
852
 * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
853
 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
854
 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
855
 *   the string contains non-LDH ASCII characters.
856
 * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
857
 *   with typo in earlier versions.
858
 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
859
 *   the string contains a leading or trailing hyphen-minus (U+002D).
860
 * @IDNA_INVALID_LENGTH: The final output string is not within the
861
 *   (inclusive) range 1 to 63 characters.
862
 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
863
 *   (for ToUnicode).
864
 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
865
 *   string does not equal the input.
866
 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
867
 *   ToASCII).
868
 * @IDNA_ICONV_ERROR: Character encoding conversion error.
869
 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
870
 *   fatal error).
871
 * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
872
 *   internally in libc).
873
 *
874
 * Enumerated return codes of idna_to_ascii_4i(),
875
 * idna_to_unicode_44i() functions (and functions derived from those
876
 * functions).  The value 0 is guaranteed to always correspond to
877
 * success.
878
 */
879
880
881
/**
882
 * Idna_flags:
883
 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
884
 *   Unicode code points.
885
 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
886
 *   rules (i.e., normal host name rules).
887
 *
888
 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
889
 */