Coverage Report

Created: 2026-06-30 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn/lib/idna.c
Line
Count
Source
1
/* idna.c --- Prototypes for Internationalized Domain Name library.
2
   Copyright (C) 2002-2026 Simon Josefsson
3
4
   This file is part of GNU Libidn.
5
6
   GNU Libidn is free software: you can redistribute it and/or
7
   modify it under the terms of either:
8
9
     * the GNU Lesser General Public License as published by the Free
10
       Software Foundation; either version 3 of the License, or (at
11
       your option) any later version.
12
13
   or
14
15
     * the GNU General Public License as published by the Free
16
       Software Foundation; either version 2 of the License, or (at
17
       your option) any later version.
18
19
   or both in parallel, as here.
20
21
   GNU Libidn is distributed in the hope that it will be useful,
22
   but WITHOUT ANY WARRANTY; without even the implied warranty of
23
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
   General Public License for more details.
25
26
   You should have received copies of the GNU General Public License and
27
   the GNU Lesser General Public License along with this program.  If
28
   not, see <https://www.gnu.org/licenses/>. */
29
30
#ifdef HAVE_CONFIG_H
31
# include "config.h"
32
#endif
33
34
#include <stdlib.h>
35
#include <string.h>
36
#include <stringprep.h>
37
#include <punycode.h>
38
39
#include "idna.h"
40
41
/* Get c_strcasecmp. */
42
#include <c-strcase.h>
43
44
78.2k
#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
45
73.0k
     (c) == 0xFF0E || (c) == 0xFF61)
46
47
/* Core functions */
48
49
/**
50
 * idna_to_ascii_4i:
51
 * @in: input array with unicode code points.
52
 * @inlen: length of input array with unicode code points.
53
 * @out: output zero terminated string that must have room for at
54
 *       least 63 characters plus the terminating zero.
55
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
56
 *   %IDNA_USE_STD3_ASCII_RULES.
57
 *
58
 * The ToASCII operation takes a sequence of Unicode code points that
59
 * make up one domain label and transforms it into a sequence of code
60
 * points in the ASCII range (0..7F). If ToASCII succeeds, the
61
 * original sequence and the resulting sequence are equivalent labels.
62
 *
63
 * It is important to note that the ToASCII operation can fail. ToASCII
64
 * fails if any step of it fails. If any step of the ToASCII operation
65
 * fails on any label in a domain name, that domain name MUST NOT be used
66
 * as an internationalized domain name. The method for deadling with this
67
 * failure is application-specific.
68
 *
69
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
70
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
71
 * sequence of ASCII code points or a failure condition.
72
 *
73
 * ToASCII never alters a sequence of code points that are all in the ASCII
74
 * range to begin with (although it could fail). Applying the ToASCII
75
 * operation multiple times has exactly the same effect as applying it just
76
 * once.
77
 *
78
 * Return value: Returns 0 on success, or an #Idna_rc error code.
79
 */
80
int
81
idna_to_ascii_4i (const uint32_t *in, size_t inlen, char *out, int flags)
82
10.6k
{
83
10.6k
  size_t len, outlen;
84
10.6k
  uint32_t *src;    /* XXX don't need to copy data? */
85
10.6k
  int rc;
86
87
  /*
88
   * ToASCII consists of the following steps:
89
   *
90
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
91
   * then skip to step 3.
92
   */
93
94
10.6k
  {
95
10.6k
    size_t i;
96
10.6k
    int inasciirange;
97
98
10.6k
    inasciirange = 1;
99
81.5k
    for (i = 0; i < inlen; i++)
100
70.8k
      if (in[i] > 0x7F)
101
31.7k
  inasciirange = 0;
102
10.6k
    if (inasciirange)
103
2.82k
      {
104
2.82k
  src = malloc (sizeof (in[0]) * (inlen + 1));
105
2.82k
  if (src == NULL)
106
0
    return IDNA_MALLOC_ERROR;
107
108
2.82k
  memcpy (src, in, sizeof (in[0]) * inlen);
109
2.82k
  src[inlen] = 0;
110
111
2.82k
  goto step3;
112
2.82k
      }
113
10.6k
  }
114
115
  /*
116
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
117
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
118
   */
119
120
7.82k
  {
121
7.82k
    char *p;
122
123
7.82k
    p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
124
7.82k
    if (p == NULL)
125
1.51k
      return IDNA_MALLOC_ERROR;
126
127
6.30k
    len = strlen (p);
128
6.30k
    do
129
8.02k
      {
130
8.02k
  char *newp;
131
132
8.02k
  len = 2 * len + 10; /* XXX better guess? */
133
8.02k
  newp = realloc (p, len);
134
8.02k
  if (newp == NULL)
135
0
    {
136
0
      free (p);
137
0
      return IDNA_MALLOC_ERROR;
138
0
    }
139
8.02k
  p = newp;
140
141
8.02k
  if (flags & IDNA_ALLOW_UNASSIGNED)
142
3.83k
    rc = stringprep_nameprep (p, len);
143
4.19k
  else
144
4.19k
    rc = stringprep_nameprep_no_unassigned (p, len);
145
8.02k
      }
146
8.02k
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
147
148
6.30k
    if (rc != STRINGPREP_OK)
149
1.77k
      {
150
1.77k
  free (p);
151
1.77k
  return IDNA_STRINGPREP_ERROR;
152
1.77k
      }
153
154
4.53k
    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
155
156
4.53k
    free (p);
157
158
4.53k
    if (!src)
159
0
      return IDNA_MALLOC_ERROR;
160
4.53k
  }
161
162
7.35k
step3:
163
  /*
164
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
165
   *
166
   * (a) Verify the absence of non-LDH ASCII code points; that is,
167
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
168
   *
169
   * (b) Verify the absence of leading and trailing hyphen-minus;
170
   * that is, the absence of U+002D at the beginning and end of
171
   * the sequence.
172
   */
173
174
7.35k
  if (flags & IDNA_USE_STD3_ASCII_RULES)
175
3.64k
    {
176
3.64k
      size_t i;
177
178
19.3k
      for (i = 0; src[i]; i++)
179
16.5k
  if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
180
15.9k
      (src[i] >= 0x3A && src[i] <= 0x40) ||
181
15.9k
      (src[i] >= 0x5B && src[i] <= 0x60) ||
182
15.8k
      (src[i] >= 0x7B && src[i] <= 0x7F))
183
784
    {
184
784
      free (src);
185
784
      return IDNA_CONTAINS_NON_LDH;
186
784
    }
187
188
2.86k
      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
189
47
  {
190
47
    free (src);
191
47
    return IDNA_CONTAINS_MINUS;
192
47
  }
193
2.86k
    }
194
195
  /*
196
   * 4. If all code points in the sequence are in the ASCII range
197
   * (0..7F), then skip to step 8.
198
   */
199
200
6.52k
  {
201
6.52k
    size_t i;
202
6.52k
    int inasciirange;
203
204
6.52k
    inasciirange = 1;
205
98.5k
    for (i = 0; src[i]; i++)
206
92.0k
      {
207
92.0k
  if (src[i] > 0x7F)
208
58.4k
    inasciirange = 0;
209
  /* copy string to output buffer if we are about to skip to step8 */
210
92.0k
  if (i < 64)
211
30.8k
    out[i] = src[i];
212
92.0k
      }
213
6.52k
    if (i < 64)
214
6.41k
      out[i] = '\0';
215
111
    else
216
111
      {
217
111
  free (src);
218
111
  return IDNA_INVALID_LENGTH;
219
111
      }
220
6.41k
    if (inasciirange)
221
2.43k
      goto step8;
222
6.41k
  }
223
224
  /*
225
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
226
   *
227
   */
228
229
3.98k
  {
230
3.98k
    size_t i;
231
3.98k
    int match;
232
233
3.98k
    match = 1;
234
8.42k
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
235
4.44k
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
236
3.96k
  match = 0;
237
3.98k
    if (match)
238
18
      {
239
18
  free (src);
240
18
  return IDNA_CONTAINS_ACE_PREFIX;
241
18
      }
242
3.98k
  }
243
244
  /*
245
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
246
   * and fail if there is an error.
247
   */
248
24.2k
  for (len = 0; src[len]; len++)
249
20.3k
    ;
250
3.96k
  src[len] = '\0';
251
3.96k
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
252
3.96k
  rc = punycode_encode (len, src, NULL,
253
3.96k
      &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
254
3.96k
  if (rc != PUNYCODE_SUCCESS)
255
55
    {
256
55
      free (src);
257
55
      return IDNA_PUNYCODE_ERROR;
258
55
    }
259
3.90k
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
260
261
  /*
262
   * 7. Prepend the ACE prefix.
263
   */
264
265
3.90k
  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
266
267
  /*
268
   * 8. Verify that the number of code points is in the range 1 to 63
269
   * inclusive (0 is excluded).
270
   */
271
272
6.34k
step8:
273
6.34k
  free (src);
274
6.34k
  if (strlen (out) < 1)
275
205
    return IDNA_INVALID_LENGTH;
276
277
6.13k
  return IDNA_SUCCESS;
278
6.34k
}
279
280
/* ToUnicode().  May realloc() utf8in.  Will free utf8in unconditionally. */
281
static int
282
idna_to_unicode_internal (char *utf8in,
283
        uint32_t *out, size_t *outlen, int flags)
284
0
{
285
0
  int rc;
286
0
  char tmpout[64];
287
0
  size_t utf8len = strlen (utf8in) + 1;
288
0
  size_t addlen = 0, addinc = utf8len / 10 + 1;
289
290
  /*
291
   * ToUnicode consists of the following steps:
292
   *
293
   * 1. If the sequence contains any code points outside the ASCII range
294
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
295
   */
296
297
0
  {
298
0
    size_t i;
299
0
    int inasciirange;
300
301
0
    inasciirange = 1;
302
0
    for (i = 0; utf8in[i]; i++)
303
0
      if (utf8in[i] & ~0x7F)
304
0
  inasciirange = 0;
305
0
    if (inasciirange)
306
0
      goto step3;
307
0
  }
308
309
  /*
310
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
311
   * error. (If step 3 of ToASCII is also performed here, it will not
312
   * affect the overall behavior of ToUnicode, but it is not
313
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
314
   */
315
0
  do
316
0
    {
317
0
      char *newp = realloc (utf8in, utf8len + addlen);
318
0
      if (newp == NULL)
319
0
  {
320
0
    free (utf8in);
321
0
    return IDNA_MALLOC_ERROR;
322
0
  }
323
0
      utf8in = newp;
324
0
      if (flags & IDNA_ALLOW_UNASSIGNED)
325
0
  rc = stringprep_nameprep (utf8in, utf8len + addlen);
326
0
      else
327
0
  rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
328
0
      addlen += addinc;
329
0
      addinc *= 2;
330
0
    }
331
0
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
332
333
0
  if (rc != STRINGPREP_OK)
334
0
    {
335
0
      free (utf8in);
336
0
      return IDNA_STRINGPREP_ERROR;
337
0
    }
338
339
  /* 3. Verify that the sequence begins with the ACE prefix, and save a
340
   * copy of the sequence.
341
   * ... The ToASCII and ToUnicode operations MUST recognize the ACE
342
   prefix in a case-insensitive manner.
343
   */
344
345
0
step3:
346
0
  if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
347
0
    {
348
0
      free (utf8in);
349
0
      return IDNA_NO_ACE_PREFIX;
350
0
    }
351
352
  /* 4. Remove the ACE prefix.
353
   */
354
355
0
  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
356
0
     strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
357
358
  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
359
   * and fail if there is an error. Save a copy of the result of
360
   * this step.
361
   */
362
363
0
  (*outlen)--;      /* reserve one for the zero */
364
365
0
  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
366
0
  if (rc != PUNYCODE_SUCCESS)
367
0
    {
368
0
      free (utf8in);
369
0
      return IDNA_PUNYCODE_ERROR;
370
0
    }
371
372
0
  out[*outlen] = 0;   /* add zero */
373
374
  /* 6. Apply ToASCII.
375
   */
376
377
0
  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
378
0
  if (rc != IDNA_SUCCESS)
379
0
    {
380
0
      free (utf8in);
381
0
      return rc;
382
0
    }
383
384
  /* 7. Verify that the result of step 6 matches the saved copy from
385
   * step 3, using a case-insensitive ASCII comparison.
386
   */
387
388
0
  if (c_strncasecmp (tmpout, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0
389
0
      || c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
390
0
    {
391
0
      free (utf8in);
392
0
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
393
0
    }
394
395
  /* 8. Return the saved copy from step 5.
396
   */
397
398
0
  free (utf8in);
399
0
  return IDNA_SUCCESS;
400
0
}
401
402
/**
403
 * idna_to_unicode_44i:
404
 * @in: input array with unicode code points.
405
 * @inlen: length of input array with unicode code points.
406
 * @out: output array with unicode code points.
407
 * @outlen: on input, maximum size of output array with unicode code points,
408
 *          on exit, actual size of output array with unicode code points.
409
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
410
 *   %IDNA_USE_STD3_ASCII_RULES.
411
 *
412
 * The ToUnicode operation takes a sequence of Unicode code points
413
 * that make up one domain label and returns a sequence of Unicode
414
 * code points. If the input sequence is a label in ACE form, then the
415
 * result is an equivalent internationalized label that is not in ACE
416
 * form, otherwise the original sequence is returned unaltered.
417
 *
418
 * ToUnicode never fails. If any step fails, then the original input
419
 * sequence is returned immediately in that step.
420
 *
421
 * The Punycode decoder can never output more code points than it
422
 * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
423
 * the number of octets needed to represent a sequence of code points
424
 * depends on the particular character encoding used.
425
 *
426
 * The inputs to ToUnicode are a sequence of code points, the
427
 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
428
 * ToUnicode is always a sequence of Unicode code points.
429
 *
430
 * Return value: Returns #Idna_rc error condition, but it must only be
431
 *   used for debugging purposes.  The output buffer is always
432
 *   guaranteed to contain the correct data according to the
433
 *   specification (sans malloc induced errors).  NB!  This means that
434
 *   you normally ignore the return code from this function, as
435
 *   checking it means breaking the standard.
436
 */
437
int
438
idna_to_unicode_44i (const uint32_t *in, size_t inlen,
439
         uint32_t *out, size_t *outlen, int flags)
440
0
{
441
0
  int rc;
442
0
  size_t outlensave = *outlen;
443
0
  char *p;
444
445
0
  p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
446
0
  if (p == NULL)
447
0
    return IDNA_MALLOC_ERROR;
448
449
0
  rc = idna_to_unicode_internal (p, out, outlen, flags);
450
0
  if (rc != IDNA_SUCCESS)
451
0
    {
452
0
      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
453
0
           inlen : outlensave));
454
0
      *outlen = inlen;
455
0
    }
456
457
  /* p is freed in idna_to_unicode_internal.  */
458
459
0
  return rc;
460
0
}
461
462
/* Wrappers that handle several labels */
463
464
/**
465
 * idna_to_ascii_4z:
466
 * @input: zero terminated input Unicode string.
467
 * @output: pointer to newly allocated output string.
468
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
469
 *   %IDNA_USE_STD3_ASCII_RULES.
470
 *
471
 * Convert UCS-4 domain name to ASCII string.  The domain name may
472
 * contain several labels, separated by dots.  The output buffer must
473
 * be deallocated by the caller.
474
 *
475
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
476
 **/
477
int
478
idna_to_ascii_4z (const uint32_t *input, char **output, int flags)
479
5.41k
{
480
5.41k
  const uint32_t *start = input;
481
5.41k
  const uint32_t *end;
482
5.41k
  char buf[64];
483
5.41k
  char *out = NULL;
484
5.41k
  int rc;
485
486
  /* 1) Whenever dots are used as label separators, the following
487
     characters MUST be recognized as dots: U+002E (full stop),
488
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
489
     U+FF61 (halfwidth ideographic full stop). */
490
491
5.41k
  if (input[0] == 0)
492
154
    {
493
      /* Handle implicit zero-length root label. */
494
154
      *output = malloc (1);
495
154
      if (!*output)
496
0
  return IDNA_MALLOC_ERROR;
497
154
      strcpy (*output, "");
498
154
      return IDNA_SUCCESS;
499
154
    }
500
501
5.26k
  if (DOTP (input[0]) && input[1] == 0)
502
40
    {
503
      /* Handle explicit zero-length root label. */
504
40
      *output = malloc (2);
505
40
      if (!*output)
506
0
  return IDNA_MALLOC_ERROR;
507
40
      strcpy (*output, ".");
508
40
      return IDNA_SUCCESS;
509
40
    }
510
511
5.22k
  *output = NULL;
512
5.22k
  do
513
9.04k
    {
514
9.04k
      end = start;
515
516
72.5k
      for (; *end && !DOTP (*end); end++)
517
63.5k
  ;
518
519
9.04k
      if (*end == '\0' && start == end)
520
43
  {
521
    /* Handle explicit zero-length root label. */
522
43
    buf[0] = '\0';
523
43
  }
524
8.99k
      else
525
8.99k
  {
526
8.99k
    rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
527
8.99k
    if (rc != IDNA_SUCCESS)
528
3.11k
      {
529
3.11k
        free (out);
530
3.11k
        return rc;
531
3.11k
      }
532
8.99k
  }
533
534
5.92k
      if (out)
535
3.67k
  {
536
3.67k
    size_t l = strlen (out) + 1 + strlen (buf) + 1;
537
3.67k
    char *newp = realloc (out, l);
538
3.67k
    if (!newp)
539
0
      {
540
0
        free (out);
541
0
        return IDNA_MALLOC_ERROR;
542
0
      }
543
3.67k
    out = newp;
544
3.67k
    strcat (out, ".");
545
3.67k
    strcat (out, buf);
546
3.67k
  }
547
2.25k
      else
548
2.25k
  {
549
2.25k
    out = strdup (buf);
550
2.25k
    if (!out)
551
0
      return IDNA_MALLOC_ERROR;
552
2.25k
  }
553
554
5.92k
      start = end + 1;
555
5.92k
    }
556
5.92k
  while (*end);
557
558
2.10k
  *output = out;
559
560
2.10k
  return IDNA_SUCCESS;
561
5.22k
}
562
563
/**
564
 * idna_to_ascii_8z:
565
 * @input: zero terminated input UTF-8 string.
566
 * @output: pointer to newly allocated output string.
567
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
568
 *   %IDNA_USE_STD3_ASCII_RULES.
569
 *
570
 * Convert UTF-8 domain name to ASCII string.  The domain name may
571
 * contain several labels, separated by dots.  The output buffer must
572
 * be deallocated by the caller.
573
 *
574
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
575
 **/
576
int
577
idna_to_ascii_8z (const char *input, char **output, int flags)
578
4.42k
{
579
4.42k
  uint32_t *ucs4;
580
4.42k
  size_t ucs4len;
581
4.42k
  int rc;
582
583
4.42k
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
584
4.42k
  if (!ucs4)
585
656
    return IDNA_ICONV_ERROR;
586
587
3.77k
  rc = idna_to_ascii_4z (ucs4, output, flags);
588
589
3.77k
  free (ucs4);
590
591
3.77k
  return rc;
592
593
4.42k
}
594
595
/**
596
 * idna_to_ascii_lz:
597
 * @input: zero terminated input string encoded in the current locale's
598
 *   character set.
599
 * @output: pointer to newly allocated output string.
600
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
601
 *   %IDNA_USE_STD3_ASCII_RULES.
602
 *
603
 * Convert domain name in the locale's encoding to ASCII string.  The
604
 * domain name may contain several labels, separated by dots.  The
605
 * output buffer must be deallocated by the caller.
606
 *
607
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
608
 **/
609
int
610
idna_to_ascii_lz (const char *input, char **output, int flags)
611
3.88k
{
612
3.88k
  char *utf8;
613
3.88k
  int rc;
614
615
3.88k
  utf8 = stringprep_locale_to_utf8 (input);
616
3.88k
  if (!utf8)
617
3.33k
    return IDNA_ICONV_ERROR;
618
619
546
  rc = idna_to_ascii_8z (utf8, output, flags);
620
621
546
  free (utf8);
622
623
546
  return rc;
624
3.88k
}
625
626
/**
627
 * idna_to_unicode_4z4z:
628
 * @input: zero-terminated Unicode string.
629
 * @output: pointer to newly allocated output Unicode string.
630
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
631
 *   %IDNA_USE_STD3_ASCII_RULES.
632
 *
633
 * Convert possibly ACE encoded domain name in UCS-4 format into a
634
 * UCS-4 string.  The domain name may contain several labels,
635
 * separated by dots.  The output buffer must be deallocated by the
636
 * caller.
637
 *
638
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
639
 **/
640
int
641
idna_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
642
0
{
643
0
  const uint32_t *start = input;
644
0
  const uint32_t *end;
645
0
  uint32_t *buf;
646
0
  size_t buflen;
647
0
  uint32_t *out = NULL;
648
0
  size_t outlen = 0;
649
0
  int rc;
650
651
0
  *output = NULL;
652
653
0
  do
654
0
    {
655
0
      end = start;
656
657
0
      for (; *end && !DOTP (*end); end++)
658
0
  ;
659
660
0
      buflen = (size_t) (end - start);
661
0
      buf = malloc (sizeof (buf[0]) * (buflen + 1));
662
0
      if (!buf)
663
0
  {
664
0
    free (out);
665
0
    return IDNA_MALLOC_ERROR;
666
0
  }
667
668
      /* don't check for non-malloc return codes as per
669
         specification! */
670
0
      rc = idna_to_unicode_44i (start, (size_t) (end - start),
671
0
        buf, &buflen, flags);
672
0
      if (rc == IDNA_MALLOC_ERROR)
673
0
  {
674
0
    free (out);
675
0
    return IDNA_MALLOC_ERROR;
676
0
  }
677
678
0
      if (out)
679
0
  {
680
0
    uint32_t *newp = realloc (out,
681
0
            sizeof (out[0])
682
0
            * (outlen + 1 + buflen + 1));
683
0
    if (!newp)
684
0
      {
685
0
        free (buf);
686
0
        free (out);
687
0
        return IDNA_MALLOC_ERROR;
688
0
      }
689
0
    out = newp;
690
0
    out[outlen++] = 0x002E; /* '.' (full stop) */
691
0
    memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
692
0
    outlen += buflen;
693
0
    out[outlen] = 0x0;
694
0
    free (buf);
695
0
  }
696
0
      else
697
0
  {
698
0
    out = buf;
699
0
    outlen = buflen;
700
0
    out[outlen] = 0x0;
701
0
  }
702
703
0
      start = end + 1;
704
0
    }
705
0
  while (*end);
706
707
0
  *output = out;
708
709
0
  return IDNA_SUCCESS;
710
0
}
711
712
/**
713
 * idna_to_unicode_8z4z:
714
 * @input: zero-terminated UTF-8 string.
715
 * @output: pointer to newly allocated output Unicode string.
716
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
717
 *   %IDNA_USE_STD3_ASCII_RULES.
718
 *
719
 * Convert possibly ACE encoded domain name in UTF-8 format into a
720
 * UCS-4 string.  The domain name may contain several labels,
721
 * separated by dots.  The output buffer must be deallocated by the
722
 * caller.
723
 *
724
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
725
 **/
726
int
727
idna_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
728
0
{
729
0
  uint32_t *ucs4;
730
0
  size_t ucs4len;
731
0
  int rc;
732
733
0
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
734
0
  if (!ucs4)
735
0
    return IDNA_ICONV_ERROR;
736
737
0
  rc = idna_to_unicode_4z4z (ucs4, output, flags);
738
0
  free (ucs4);
739
740
0
  return rc;
741
0
}
742
743
/**
744
 * idna_to_unicode_8z8z:
745
 * @input: zero-terminated UTF-8 string.
746
 * @output: pointer to newly allocated output UTF-8 string.
747
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
748
 *   %IDNA_USE_STD3_ASCII_RULES.
749
 *
750
 * Convert possibly ACE encoded domain name in UTF-8 format into a
751
 * UTF-8 string.  The domain name may contain several labels,
752
 * separated by dots.  The output buffer must be deallocated by the
753
 * caller.
754
 *
755
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
756
 **/
757
int
758
idna_to_unicode_8z8z (const char *input, char **output, int flags)
759
0
{
760
0
  uint32_t *ucs4;
761
0
  int rc;
762
763
0
  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
764
0
  if (rc != IDNA_SUCCESS)
765
0
    return rc;
766
767
0
  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
768
0
  free (ucs4);
769
770
0
  if (!*output)
771
0
    return IDNA_ICONV_ERROR;
772
773
0
  return IDNA_SUCCESS;
774
0
}
775
776
/**
777
 * idna_to_unicode_8zlz:
778
 * @input: zero-terminated UTF-8 string.
779
 * @output: pointer to newly allocated output string encoded in the
780
 *   current locale's character set.
781
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
782
 *   %IDNA_USE_STD3_ASCII_RULES.
783
 *
784
 * Convert possibly ACE encoded domain name in UTF-8 format into a
785
 * string encoded in the current locale's character set.  The domain
786
 * name may contain several labels, separated by dots.  The output
787
 * buffer must be deallocated by the caller.
788
 *
789
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
790
 **/
791
int
792
idna_to_unicode_8zlz (const char *input, char **output, int flags)
793
0
{
794
0
  char *utf8;
795
0
  int rc;
796
797
0
  rc = idna_to_unicode_8z8z (input, &utf8, flags);
798
0
  if (rc != IDNA_SUCCESS)
799
0
    return rc;
800
801
0
  *output = stringprep_utf8_to_locale (utf8);
802
0
  free (utf8);
803
804
0
  if (!*output)
805
0
    return IDNA_ICONV_ERROR;
806
807
0
  return IDNA_SUCCESS;
808
0
}
809
810
/**
811
 * idna_to_unicode_lzlz:
812
 * @input: zero-terminated string encoded in the current locale's
813
 *   character set.
814
 * @output: pointer to newly allocated output string encoded in the
815
 *   current locale's character set.
816
 * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
817
 *   %IDNA_USE_STD3_ASCII_RULES.
818
 *
819
 * Convert possibly ACE encoded domain name in the locale's character
820
 * set into a string encoded in the current locale's character set.
821
 * The domain name may contain several labels, separated by dots.  The
822
 * output buffer must be deallocated by the caller.
823
 *
824
 * Return value: Returns %IDNA_SUCCESS on success, or error code.
825
 **/
826
int
827
idna_to_unicode_lzlz (const char *input, char **output, int flags)
828
0
{
829
0
  char *utf8;
830
0
  int rc;
831
832
0
  utf8 = stringprep_locale_to_utf8 (input);
833
0
  if (!utf8)
834
0
    return IDNA_ICONV_ERROR;
835
836
0
  rc = idna_to_unicode_8zlz (utf8, output, flags);
837
0
  free (utf8);
838
839
0
  return rc;
840
0
}
841
842
/**
843
 * IDNA_ACE_PREFIX
844
 *
845
 * The IANA allocated prefix to use for IDNA. "xn--"
846
 */
847
848
/**
849
 * Idna_rc:
850
 * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
851
 *   always be zero, the remaining ones are only guaranteed to hold
852
 *   non-zero values, for logical comparison purposes.
853
 * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
854
 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
855
 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
856
 *   the string contains non-LDH ASCII characters.
857
 * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
858
 *   with typo in earlier versions.
859
 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
860
 *   the string contains a leading or trailing hyphen-minus (U+002D).
861
 * @IDNA_INVALID_LENGTH: The final output string is not within the
862
 *   (inclusive) range 1 to 63 characters.
863
 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
864
 *   (for ToUnicode).
865
 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
866
 *   string does not equal the input.
867
 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
868
 *   ToASCII).
869
 * @IDNA_ICONV_ERROR: Character encoding conversion error.
870
 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
871
 *   fatal error).
872
 * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
873
 *   internally in libc).
874
 *
875
 * Enumerated return codes of idna_to_ascii_4i(),
876
 * idna_to_unicode_44i() functions (and functions derived from those
877
 * functions).  The value 0 is guaranteed to always correspond to
878
 * success.
879
 */
880
881
882
/**
883
 * Idna_flags:
884
 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
885
 *   Unicode code points.
886
 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
887
 *   rules (i.e., normal host name rules).
888
 *
889
 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
890
 */