Coverage Report

Created: 2023-03-26 07:10

/src/libidn/lib/idna.c
Line
Count
Source (jump to first uncovered line)
1
/* idna.c --- Prototypes for Internationalized Domain Name library.
2
   Copyright (C) 2002-2023 Simon Josefsson
3
4
   This file is part of GNU Libidn.
5
6
   GNU Libidn is free software: you can redistribute it and/or
7
   modify it under the terms of either:
8
9
     * the GNU Lesser General Public License as published by the Free
10
       Software Foundation; either version 3 of the License, or (at
11
       your option) any later version.
12
13
   or
14
15
     * the GNU General Public License as published by the Free
16
       Software Foundation; either version 2 of the License, or (at
17
       your option) any later version.
18
19
   or both in parallel, as here.
20
21
   GNU Libidn is distributed in the hope that it will be useful,
22
   but WITHOUT ANY WARRANTY; without even the implied warranty of
23
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
   General Public License for more details.
25
26
   You should have received copies of the GNU General Public License and
27
   the GNU Lesser General Public License along with this program.  If
28
   not, see <https://www.gnu.org/licenses/>. */
29
30
#ifdef HAVE_CONFIG_H
31
# include "config.h"
32
#endif
33
34
#include <stdlib.h>
35
#include <string.h>
36
#include <stringprep.h>
37
#include <punycode.h>
38
39
#include "idna.h"
40
41
/* Get c_strcasecmp. */
42
#include <c-strcase.h>
43
44
506k
#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
45
506k
     (c) == 0xFF0E || (c) == 0xFF61)
46
47
/* Core functions */
48
49
/**
50
 * idna_to_ascii_4i:
51
 * @in: input array with unicode code points.
52
 * @inlen: length of input array with unicode code points.
53
 * @out: output zero terminated string that must have room for at
54
 *       least 63 characters plus the terminating zero.
55
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
56
 *   %IDNA_USE_STD3_ASCII_RULES.
57
 *
58
 * The ToASCII operation takes a sequence of Unicode code points that
59
 * make up one domain label and transforms it into a sequence of code
60
 * points in the ASCII range (0..7F). If ToASCII succeeds, the
61
 * original sequence and the resulting sequence are equivalent labels.
62
 *
63
 * It is important to note that the ToASCII operation can fail. ToASCII
64
 * fails if any step of it fails. If any step of the ToASCII operation
65
 * fails on any label in a domain name, that domain name MUST NOT be used
66
 * as an internationalized domain name. The method for deadling with this
67
 * failure is application-specific.
68
 *
69
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
70
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
71
 * sequence of ASCII code points or a failure condition.
72
 *
73
 * ToASCII never alters a sequence of code points that are all in the ASCII
74
 * range to begin with (although it could fail). Applying the ToASCII
75
 * operation multiple times has exactly the same effect as applying it just
76
 * once.
77
 *
78
 * Return value: Returns 0 on success, or an #Idna_rc error code.
79
 */
80
int
81
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
82
12.5k
{
83
12.5k
  size_t len, outlen;
84
12.5k
  uint32_t *src;    /* XXX don't need to copy data? */
85
12.5k
  int rc;
86
87
  /*
88
   * ToASCII consists of the following steps:
89
   *
90
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
91
   * then skip to step 3.
92
   */
93
94
12.5k
  {
95
12.5k
    size_t i;
96
12.5k
    int inasciirange;
97
98
12.5k
    inasciirange = 1;
99
308k
    for (i = 0; i < inlen; i++)
100
295k
      if (in[i] > 0x7F)
101
246k
  inasciirange = 0;
102
12.5k
    if (inasciirange)
103
4.74k
      {
104
4.74k
  src = malloc (sizeof (in[0]) * (inlen + 1));
105
4.74k
  if (src == NULL)
106
0
    return IDNA_MALLOC_ERROR;
107
108
4.74k
  memcpy (src, in, sizeof (in[0]) * inlen);
109
4.74k
  src[inlen] = 0;
110
111
4.74k
  goto step3;
112
4.74k
      }
113
12.5k
  }
114
115
  /*
116
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
117
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
118
   */
119
120
7.78k
  {
121
7.78k
    char *p;
122
123
7.78k
    p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
124
7.78k
    if (p == NULL)
125
0
      return IDNA_MALLOC_ERROR;
126
127
7.78k
    len = strlen (p);
128
7.78k
    do
129
11.1k
      {
130
11.1k
  char *newp;
131
132
11.1k
  len = 2 * len + 10; /* XXX better guess? */
133
11.1k
  newp = realloc (p, len);
134
11.1k
  if (newp == NULL)
135
0
    {
136
0
      free (p);
137
0
      return IDNA_MALLOC_ERROR;
138
0
    }
139
11.1k
  p = newp;
140
141
11.1k
  if (flags & IDNA_ALLOW_UNASSIGNED)
142
5.74k
    rc = stringprep_nameprep (p, len);
143
5.42k
  else
144
5.42k
    rc = stringprep_nameprep_no_unassigned (p, len);
145
11.1k
      }
146
11.1k
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
147
148
7.78k
    if (rc != STRINGPREP_OK)
149
2.28k
      {
150
2.28k
  free (p);
151
2.28k
  return IDNA_STRINGPREP_ERROR;
152
2.28k
      }
153
154
5.49k
    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
155
156
5.49k
    free (p);
157
158
5.49k
    if (!src)
159
0
      return IDNA_MALLOC_ERROR;
160
5.49k
  }
161
162
10.2k
step3:
163
  /*
164
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
165
   *
166
   * (a) Verify the absence of non-LDH ASCII code points; that is,
167
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
168
   *
169
   * (b) Verify the absence of leading and trailing hyphen-minus;
170
   * that is, the absence of U+002D at the beginning and end of
171
   * the sequence.
172
   */
173
174
10.2k
  if (flags & IDNA_USE_STD3_ASCII_RULES)
175
5.45k
    {
176
5.45k
      size_t i;
177
178
105k
      for (i = 0; src[i]; i++)
179
102k
  if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
180
102k
      (src[i] >= 0x3A && src[i] <= 0x40) ||
181
102k
      (src[i] >= 0x5B && src[i] <= 0x60) ||
182
102k
      (src[i] >= 0x7B && src[i] <= 0x7F))
183
2.31k
    {
184
2.31k
      free (src);
185
2.31k
      return IDNA_CONTAINS_NON_LDH;
186
2.31k
    }
187
188
3.14k
      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
189
465
  {
190
465
    free (src);
191
465
    return IDNA_CONTAINS_MINUS;
192
465
  }
193
3.14k
    }
194
195
  /*
196
   * 4. If all code points in the sequence are in the ASCII range
197
   * (0..7F), then skip to step 8.
198
   */
199
200
7.46k
  {
201
7.46k
    size_t i;
202
7.46k
    int inasciirange;
203
204
7.46k
    inasciirange = 1;
205
931k
    for (i = 0; src[i]; i++)
206
924k
      {
207
924k
  if (src[i] > 0x7F)
208
768k
    inasciirange = 0;
209
  /* copy string to output buffer if we are about to skip to step8 */
210
924k
  if (i < 64)
211
101k
    out[i] = src[i];
212
924k
      }
213
7.46k
    if (i < 64)
214
6.90k
      out[i] = '\0';
215
560
    else
216
560
      {
217
560
  free (src);
218
560
  return IDNA_INVALID_LENGTH;
219
560
      }
220
6.90k
    if (inasciirange)
221
3.50k
      goto step8;
222
6.90k
  }
223
224
  /*
225
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
226
   *
227
   */
228
229
3.40k
  {
230
3.40k
    size_t i;
231
3.40k
    int match;
232
233
3.40k
    match = 1;
234
7.71k
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
235
4.31k
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
236
3.13k
  match = 0;
237
3.40k
    if (match)
238
262
      {
239
262
  free (src);
240
262
  return IDNA_CONTAINS_ACE_PREFIX;
241
262
      }
242
3.40k
  }
243
244
  /*
245
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
246
   * and fail if there is an error.
247
   */
248
62.3k
  for (len = 0; src[len]; len++)
249
59.1k
    ;
250
3.13k
  src[len] = '\0';
251
3.13k
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
252
3.13k
  rc = punycode_encode (len, src, NULL,
253
3.13k
      &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
254
3.13k
  if (rc != PUNYCODE_SUCCESS)
255
655
    {
256
655
      free (src);
257
655
      return IDNA_PUNYCODE_ERROR;
258
655
    }
259
2.48k
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
260
261
  /*
262
   * 7. Prepend the ACE prefix.
263
   */
264
265
2.48k
  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
266
267
  /*
268
   * 8. Verify that the number of code points is in the range 1 to 63
269
   * inclusive (0 is excluded).
270
   */
271
272
5.99k
step8:
273
5.99k
  free (src);
274
5.99k
  if (strlen (out) < 1)
275
802
    return IDNA_INVALID_LENGTH;
276
277
5.18k
  return IDNA_SUCCESS;
278
5.99k
}
279
280
/* ToUnicode().  May realloc() utf8in.  Will free utf8in unconditionally. */
281
static int
282
idna_to_unicode_internal (char *utf8in,
283
        uint32_t * out, size_t *outlen, int flags)
284
35.2k
{
285
35.2k
  int rc;
286
35.2k
  char tmpout[64];
287
35.2k
  size_t utf8len = strlen (utf8in) + 1;
288
35.2k
  size_t addlen = 0, addinc = utf8len / 10 + 1;
289
290
  /*
291
   * ToUnicode consists of the following steps:
292
   *
293
   * 1. If the sequence contains any code points outside the ASCII range
294
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
295
   */
296
297
35.2k
  {
298
35.2k
    size_t i;
299
35.2k
    int inasciirange;
300
301
35.2k
    inasciirange = 1;
302
695k
    for (i = 0; utf8in[i]; i++)
303
660k
      if (utf8in[i] & ~0x7F)
304
214k
  inasciirange = 0;
305
35.2k
    if (inasciirange)
306
21.2k
      goto step3;
307
35.2k
  }
308
309
  /*
310
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
311
   * error. (If step 3 of ToASCII is also performed here, it will not
312
   * affect the overall behavior of ToUnicode, but it is not
313
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
314
   */
315
14.0k
  do
316
26.2k
    {
317
26.2k
      char *newp = realloc (utf8in, utf8len + addlen);
318
26.2k
      if (newp == NULL)
319
0
  {
320
0
    free (utf8in);
321
0
    return IDNA_MALLOC_ERROR;
322
0
  }
323
26.2k
      utf8in = newp;
324
26.2k
      if (flags & IDNA_ALLOW_UNASSIGNED)
325
13.8k
  rc = stringprep_nameprep (utf8in, utf8len + addlen);
326
12.3k
      else
327
12.3k
  rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
328
26.2k
      addlen += addinc;
329
26.2k
      addinc *= 2;
330
26.2k
    }
331
26.2k
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
332
333
14.0k
  if (rc != STRINGPREP_OK)
334
6.27k
    {
335
6.27k
      free (utf8in);
336
6.27k
      return IDNA_STRINGPREP_ERROR;
337
6.27k
    }
338
339
  /* 3. Verify that the sequence begins with the ACE prefix, and save a
340
   * copy of the sequence.
341
   * ... The ToASCII and ToUnicode operations MUST recognize the ACE
342
   prefix in a case-insensitive manner.
343
   */
344
345
29.0k
step3:
346
29.0k
  if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
347
12.8k
    {
348
12.8k
      free (utf8in);
349
12.8k
      return IDNA_NO_ACE_PREFIX;
350
12.8k
    }
351
352
  /* 4. Remove the ACE prefix.
353
   */
354
355
16.2k
  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
356
16.2k
     strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
357
358
  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
359
   * and fail if there is an error. Save a copy of the result of
360
   * this step.
361
   */
362
363
16.2k
  (*outlen)--;      /* reserve one for the zero */
364
365
16.2k
  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
366
16.2k
  if (rc != PUNYCODE_SUCCESS)
367
3.67k
    {
368
3.67k
      free (utf8in);
369
3.67k
      return IDNA_PUNYCODE_ERROR;
370
3.67k
    }
371
372
12.5k
  out[*outlen] = 0;   /* add zero */
373
374
  /* 6. Apply ToASCII.
375
   */
376
377
12.5k
  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
378
12.5k
  if (rc != IDNA_SUCCESS)
379
7.33k
    {
380
7.33k
      free (utf8in);
381
7.33k
      return rc;
382
7.33k
    }
383
384
  /* 7. Verify that the result of step 6 matches the saved copy from
385
   * step 3, using a case-insensitive ASCII comparison.
386
   */
387
388
5.18k
  if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
389
4.38k
    {
390
4.38k
      free (utf8in);
391
4.38k
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
392
4.38k
    }
393
394
  /* 8. Return the saved copy from step 5.
395
   */
396
397
806
  free (utf8in);
398
806
  return IDNA_SUCCESS;
399
5.18k
}
400
401
/**
402
 * idna_to_unicode_44i:
403
 * @in: input array with unicode code points.
404
 * @inlen: length of input array with unicode code points.
405
 * @out: output array with unicode code points.
406
 * @outlen: on input, maximum size of output array with unicode code points,
407
 *          on exit, actual size of output array with unicode code points.
408
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
409
 *   %IDNA_USE_STD3_ASCII_RULES.
410
 *
411
 * The ToUnicode operation takes a sequence of Unicode code points
412
 * that make up one domain label and returns a sequence of Unicode
413
 * code points. If the input sequence is a label in ACE form, then the
414
 * result is an equivalent internationalized label that is not in ACE
415
 * form, otherwise the original sequence is returned unaltered.
416
 *
417
 * ToUnicode never fails. If any step fails, then the original input
418
 * sequence is returned immediately in that step.
419
 *
420
 * The Punycode decoder can never output more code points than it
421
 * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
422
 * the number of octets needed to represent a sequence of code points
423
 * depends on the particular character encoding used.
424
 *
425
 * The inputs to ToUnicode are a sequence of code points, the
426
 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
427
 * ToUnicode is always a sequence of Unicode code points.
428
 *
429
 * Return value: Returns #Idna_rc error condition, but it must only be
430
 *   used for debugging purposes.  The output buffer is always
431
 *   guaranteed to contain the correct data according to the
432
 *   specification (sans malloc induced errors).  NB!  This means that
433
 *   you normally ignore the return code from this function, as
434
 *   checking it means breaking the standard.
435
 */
436
int
437
idna_to_unicode_44i (const uint32_t * in, size_t inlen,
438
         uint32_t * out, size_t *outlen, int flags)
439
37.6k
{
440
37.6k
  int rc;
441
37.6k
  size_t outlensave = *outlen;
442
37.6k
  char *p;
443
444
37.6k
  p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
445
37.6k
  if (p == NULL)
446
2.34k
    return IDNA_MALLOC_ERROR;
447
448
35.2k
  rc = idna_to_unicode_internal (p, out, outlen, flags);
449
35.2k
  if (rc != IDNA_SUCCESS)
450
34.4k
    {
451
34.4k
      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
452
34.4k
           inlen : outlensave));
453
34.4k
      *outlen = inlen;
454
34.4k
    }
455
456
  /* p is freed in idna_to_unicode_internal.  */
457
458
35.2k
  return rc;
459
37.6k
}
460
461
/* Wrappers that handle several labels */
462
463
/**
464
 * idna_to_ascii_4z:
465
 * @input: zero terminated input Unicode string.
466
 * @output: pointer to newly allocated output string.
467
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
468
 *   %IDNA_USE_STD3_ASCII_RULES.
469
 *
470
 * Convert UCS-4 domain name to ASCII string.  The domain name may
471
 * contain several labels, separated by dots.  The output buffer must
472
 * be deallocated by the caller.
473
 *
474
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
475
 **/
476
int
477
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
478
0
{
479
0
  const uint32_t *start = input;
480
0
  const uint32_t *end;
481
0
  char buf[64];
482
0
  char *out = NULL;
483
0
  int rc;
484
485
  /* 1) Whenever dots are used as label separators, the following
486
     characters MUST be recognized as dots: U+002E (full stop),
487
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
488
     U+FF61 (halfwidth ideographic full stop). */
489
490
0
  if (input[0] == 0)
491
0
    {
492
      /* Handle implicit zero-length root label. */
493
0
      *output = malloc (1);
494
0
      if (!*output)
495
0
  return IDNA_MALLOC_ERROR;
496
0
      strcpy (*output, "");
497
0
      return IDNA_SUCCESS;
498
0
    }
499
500
0
  if (DOTP (input[0]) && input[1] == 0)
501
0
    {
502
      /* Handle explicit zero-length root label. */
503
0
      *output = malloc (2);
504
0
      if (!*output)
505
0
  return IDNA_MALLOC_ERROR;
506
0
      strcpy (*output, ".");
507
0
      return IDNA_SUCCESS;
508
0
    }
509
510
0
  *output = NULL;
511
0
  do
512
0
    {
513
0
      end = start;
514
515
0
      for (; *end && !DOTP (*end); end++)
516
0
  ;
517
518
0
      if (*end == '\0' && start == end)
519
0
  {
520
    /* Handle explicit zero-length root label. */
521
0
    buf[0] = '\0';
522
0
  }
523
0
      else
524
0
  {
525
0
    rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
526
0
    if (rc != IDNA_SUCCESS)
527
0
      {
528
0
        free (out);
529
0
        return rc;
530
0
      }
531
0
  }
532
533
0
      if (out)
534
0
  {
535
0
    size_t l = strlen (out) + 1 + strlen (buf) + 1;
536
0
    char *newp = realloc (out, l);
537
0
    if (!newp)
538
0
      {
539
0
        free (out);
540
0
        return IDNA_MALLOC_ERROR;
541
0
      }
542
0
    out = newp;
543
0
    strcat (out, ".");
544
0
    strcat (out, buf);
545
0
  }
546
0
      else
547
0
  {
548
0
    out = strdup (buf);
549
0
    if (!out)
550
0
      return IDNA_MALLOC_ERROR;
551
0
  }
552
553
0
      start = end + 1;
554
0
    }
555
0
  while (*end);
556
557
0
  *output = out;
558
559
0
  return IDNA_SUCCESS;
560
0
}
561
562
/**
563
 * idna_to_ascii_8z:
564
 * @input: zero terminated input UTF-8 string.
565
 * @output: pointer to newly allocated output string.
566
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
567
 *   %IDNA_USE_STD3_ASCII_RULES.
568
 *
569
 * Convert UTF-8 domain name to ASCII string.  The domain name may
570
 * contain several labels, separated by dots.  The output buffer must
571
 * be deallocated by the caller.
572
 *
573
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
574
 **/
575
int
576
idna_to_ascii_8z (const char *input, char **output, int flags)
577
0
{
578
0
  uint32_t *ucs4;
579
0
  size_t ucs4len;
580
0
  int rc;
581
582
0
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
583
0
  if (!ucs4)
584
0
    return IDNA_ICONV_ERROR;
585
586
0
  rc = idna_to_ascii_4z (ucs4, output, flags);
587
588
0
  free (ucs4);
589
590
0
  return rc;
591
592
0
}
593
594
/**
595
 * idna_to_ascii_lz:
596
 * @input: zero terminated input string encoded in the current locale's
597
 *   character set.
598
 * @output: pointer to newly allocated output string.
599
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
600
 *   %IDNA_USE_STD3_ASCII_RULES.
601
 *
602
 * Convert domain name in the locale's encoding to ASCII string.  The
603
 * domain name may contain several labels, separated by dots.  The
604
 * output buffer must be deallocated by the caller.
605
 *
606
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
607
 **/
608
int
609
idna_to_ascii_lz (const char *input, char **output, int flags)
610
0
{
611
0
  char *utf8;
612
0
  int rc;
613
614
0
  utf8 = stringprep_locale_to_utf8 (input);
615
0
  if (!utf8)
616
0
    return IDNA_ICONV_ERROR;
617
618
0
  rc = idna_to_ascii_8z (utf8, output, flags);
619
620
0
  free (utf8);
621
622
0
  return rc;
623
0
}
624
625
/**
626
 * idna_to_unicode_4z4z:
627
 * @input: zero-terminated Unicode string.
628
 * @output: pointer to newly allocated output Unicode string.
629
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
630
 *   %IDNA_USE_STD3_ASCII_RULES.
631
 *
632
 * Convert possibly ACE encoded domain name in UCS-4 format into a
633
 * UCS-4 string.  The domain name may contain several labels,
634
 * separated by dots.  The output buffer must be deallocated by the
635
 * caller.
636
 *
637
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
638
 **/
639
int
640
idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
641
15.4k
{
642
15.4k
  const uint32_t *start = input;
643
15.4k
  const uint32_t *end;
644
15.4k
  uint32_t *buf;
645
15.4k
  size_t buflen;
646
15.4k
  uint32_t *out = NULL;
647
15.4k
  size_t outlen = 0;
648
649
15.4k
  *output = NULL;
650
651
15.4k
  do
652
35.1k
    {
653
35.1k
      end = start;
654
655
521k
      for (; *end && !DOTP (*end); end++)
656
486k
  ;
657
658
35.1k
      buflen = (size_t) (end - start);
659
35.1k
      buf = malloc (sizeof (buf[0]) * (buflen + 1));
660
35.1k
      if (!buf)
661
0
  {
662
0
    free (out);
663
0
    return IDNA_MALLOC_ERROR;
664
0
  }
665
666
      /* don't check return code as per specification! */
667
35.1k
      idna_to_unicode_44i (start, (size_t) (end - start),
668
35.1k
         buf, &buflen, flags);
669
670
35.1k
      if (out)
671
19.6k
  {
672
19.6k
    uint32_t *newp = realloc (out,
673
19.6k
            sizeof (out[0])
674
19.6k
            * (outlen + 1 + buflen + 1));
675
19.6k
    if (!newp)
676
0
      {
677
0
        free (buf);
678
0
        free (out);
679
0
        return IDNA_MALLOC_ERROR;
680
0
      }
681
19.6k
    out = newp;
682
19.6k
    out[outlen++] = 0x002E; /* '.' (full stop) */
683
19.6k
    memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
684
19.6k
    outlen += buflen;
685
19.6k
    out[outlen] = 0x0;
686
19.6k
    free (buf);
687
19.6k
  }
688
15.4k
      else
689
15.4k
  {
690
15.4k
    out = buf;
691
15.4k
    outlen = buflen;
692
15.4k
    out[outlen] = 0x0;
693
15.4k
  }
694
695
35.1k
      start = end + 1;
696
35.1k
    }
697
35.1k
  while (*end);
698
699
15.4k
  *output = out;
700
701
15.4k
  return IDNA_SUCCESS;
702
15.4k
}
703
704
/**
705
 * idna_to_unicode_8z4z:
706
 * @input: zero-terminated UTF-8 string.
707
 * @output: pointer to newly allocated output Unicode string.
708
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
709
 *   %IDNA_USE_STD3_ASCII_RULES.
710
 *
711
 * Convert possibly ACE encoded domain name in UTF-8 format into a
712
 * UCS-4 string.  The domain name may contain several labels,
713
 * separated by dots.  The output buffer must be deallocated by the
714
 * caller.
715
 *
716
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
717
 **/
718
int
719
idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
720
14.9k
{
721
14.9k
  uint32_t *ucs4;
722
14.9k
  size_t ucs4len;
723
14.9k
  int rc;
724
725
14.9k
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
726
14.9k
  if (!ucs4)
727
1.97k
    return IDNA_ICONV_ERROR;
728
729
12.9k
  rc = idna_to_unicode_4z4z (ucs4, output, flags);
730
12.9k
  free (ucs4);
731
732
12.9k
  return rc;
733
14.9k
}
734
735
/**
736
 * idna_to_unicode_8z8z:
737
 * @input: zero-terminated UTF-8 string.
738
 * @output: pointer to newly allocated output UTF-8 string.
739
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
740
 *   %IDNA_USE_STD3_ASCII_RULES.
741
 *
742
 * Convert possibly ACE encoded domain name in UTF-8 format into a
743
 * UTF-8 string.  The domain name may contain several labels,
744
 * separated by dots.  The output buffer must be deallocated by the
745
 * caller.
746
 *
747
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
748
 **/
749
int
750
idna_to_unicode_8z8z (const char *input, char **output, int flags)
751
12.4k
{
752
12.4k
  uint32_t *ucs4;
753
12.4k
  int rc;
754
755
12.4k
  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
756
12.4k
  if (rc != IDNA_SUCCESS)
757
1.37k
    return rc;
758
759
11.0k
  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
760
11.0k
  free (ucs4);
761
762
11.0k
  if (!*output)
763
0
    return IDNA_ICONV_ERROR;
764
765
11.0k
  return IDNA_SUCCESS;
766
11.0k
}
767
768
/**
769
 * idna_to_unicode_8zlz:
770
 * @input: zero-terminated UTF-8 string.
771
 * @output: pointer to newly allocated output string encoded in the
772
 *   current locale's character set.
773
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
774
 *   %IDNA_USE_STD3_ASCII_RULES.
775
 *
776
 * Convert possibly ACE encoded domain name in UTF-8 format into a
777
 * string encoded in the current locale's character set.  The domain
778
 * name may contain several labels, separated by dots.  The output
779
 * buffer must be deallocated by the caller.
780
 *
781
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
782
 **/
783
int
784
idna_to_unicode_8zlz (const char *input, char **output, int flags)
785
7.23k
{
786
7.23k
  char *utf8;
787
7.23k
  int rc;
788
789
7.23k
  rc = idna_to_unicode_8z8z (input, &utf8, flags);
790
7.23k
  if (rc != IDNA_SUCCESS)
791
688
    return rc;
792
793
6.54k
  *output = stringprep_utf8_to_locale (utf8);
794
6.54k
  free (utf8);
795
796
6.54k
  if (!*output)
797
2.74k
    return IDNA_ICONV_ERROR;
798
799
3.79k
  return IDNA_SUCCESS;
800
6.54k
}
801
802
/**
803
 * idna_to_unicode_lzlz:
804
 * @input: zero-terminated string encoded in the current locale's
805
 *   character set.
806
 * @output: pointer to newly allocated output string encoded in the
807
 *   current locale's character set.
808
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
809
 *   %IDNA_USE_STD3_ASCII_RULES.
810
 *
811
 * Convert possibly ACE encoded domain name in the locale's character
812
 * set into a string encoded in the current locale's character set.
813
 * The domain name may contain several labels, separated by dots.  The
814
 * output buffer must be deallocated by the caller.
815
 *
816
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
817
 **/
818
int
819
idna_to_unicode_lzlz (const char *input, char **output, int flags)
820
5.23k
{
821
5.23k
  char *utf8;
822
5.23k
  int rc;
823
824
5.23k
  utf8 = stringprep_locale_to_utf8 (input);
825
5.23k
  if (!utf8)
826
3.22k
    return IDNA_ICONV_ERROR;
827
828
2.00k
  rc = idna_to_unicode_8zlz (utf8, output, flags);
829
2.00k
  free (utf8);
830
831
2.00k
  return rc;
832
5.23k
}
833
834
/**
835
 * IDNA_ACE_PREFIX
836
 *
837
 * The IANA allocated prefix to use for IDNA. "xn--"
838
 */
839
840
/**
841
 * Idna_rc:
842
 * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
843
 *   always be zero, the remaining ones are only guaranteed to hold
844
 *   non-zero values, for logical comparison purposes.
845
 * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
846
 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
847
 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
848
 *   the string contains non-LDH ASCII characters.
849
 * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
850
 *   with typo in earlier versions.
851
 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
852
 *   the string contains a leading or trailing hyphen-minus (U+002D).
853
 * @IDNA_INVALID_LENGTH: The final output string is not within the
854
 *   (inclusive) range 1 to 63 characters.
855
 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
856
 *   (for ToUnicode).
857
 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
858
 *   string does not equal the input.
859
 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
860
 *   ToASCII).
861
 * @IDNA_ICONV_ERROR: Character encoding conversion error.
862
 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
863
 *   fatal error).
864
 * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
865
 *   internally in libc).
866
 *
867
 * Enumerated return codes of idna_to_ascii_4i(),
868
 * idna_to_unicode_44i() functions (and functions derived from those
869
 * functions).  The value 0 is guaranteed to always correspond to
870
 * success.
871
 */
872
873
874
/**
875
 * Idna_flags:
876
 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
877
 *   Unicode code points.
878
 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
879
 *   rules (i.e., normal host name rules).
880
 *
881
 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
882
 */