Coverage Report

Created: 2026-03-14 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/decode.c
Line
Count
Source
1
/* decode.c - implementation of IDNA2008 decoding functions
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include "idn2.h"
32
33
#include <errno.h>    /* errno */
34
#include <stdlib.h>   /* malloc, free */
35
36
#include <unitypes.h>
37
#include <uniconv.h>    /* u8_strconv_from_locale */
38
#include <unistr.h>   /* u8_to_u32, u32_cpy, ... */
39
40
/**
41
 * idn2_to_unicode_8z4z:
42
 * @input: Input zero-terminated UTF-8 string.
43
 * @output: Newly allocated UTF-32/UCS-4 output string.
44
 * @flags: Currently unused.
45
 *
46
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
47
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
48
 * and must be deallocated by the caller.
49
 *
50
 * @output may be NULL to test lookup of @input without allocating memory.
51
 *
52
 * Returns:
53
 *   %IDN2_OK: The conversion was successful.
54
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
55
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
56
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
57
 *   %IDN2_MALLOC: Memory allocation failed.
58
 *
59
 * Since: 2.0.0
60
 **/
61
int
62
idn2_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
63
979
{
64
979
  (void) flags;
65
979
  uint32_t *domain_u32;
66
979
  int rc;
67
68
979
  if (!input)
69
0
    {
70
0
      if (output)
71
0
  *output = NULL;
72
0
      return IDN2_OK;
73
0
    }
74
75
  /* split into labels and check */
76
979
  uint32_t out_u32[IDN2_DOMAIN_MAX_LENGTH + 1];
77
979
  size_t out_len = 0;
78
979
  const char *e, *s;
79
80
4.41k
  for (e = s = input; *e; s = e)
81
3.72k
    {
82
3.72k
      uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
83
3.72k
      size_t label_len = IDN2_LABEL_MAX_LENGTH;
84
85
54.5k
      while (*e && *e != '.')
86
50.8k
  e++;
87
88
3.72k
      if (e - s >= 4 && (s[0] == 'x' || s[0] == 'X')
89
1.78k
    && (s[1] == 'n' || s[1] == 'N') && s[2] == '-' && s[3] == '-')
90
1.09k
  {
91
1.09k
    s += 4;
92
93
1.09k
    rc = idn2_punycode_decode ((char *) s, e - s,
94
1.09k
             label_u32, &label_len);
95
1.09k
    if (rc)
96
251
      return rc;
97
98
846
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
99
1
      return IDN2_TOO_BIG_DOMAIN;
100
101
845
    u32_cpy (out_u32 + out_len, label_u32, label_len);
102
845
  }
103
2.62k
      else
104
2.62k
  {
105
    /* convert UTF-8 input to UTF-32 */
106
2.62k
    if (!
107
2.62k
        (domain_u32 =
108
2.62k
         u8_to_u32 ((uint8_t *) s, e - s, NULL, &label_len)))
109
0
      {
110
0
        if (errno == ENOMEM)
111
0
    return IDN2_MALLOC;
112
0
        return IDN2_ENCODING_ERROR;
113
0
      }
114
115
2.62k
    if (label_len > IDN2_LABEL_MAX_LENGTH)
116
36
      {
117
36
        free (domain_u32);
118
36
        return IDN2_TOO_BIG_LABEL;
119
36
      }
120
121
2.59k
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
122
2
      {
123
2
        free (domain_u32);
124
2
        return IDN2_TOO_BIG_DOMAIN;
125
2
      }
126
127
2.58k
    u32_cpy (out_u32 + out_len, domain_u32, label_len);
128
2.58k
    free (domain_u32);
129
2.58k
  }
130
131
3.43k
      out_len += label_len;
132
3.43k
      if (*e)
133
2.79k
  {
134
2.79k
    out_u32[out_len++] = '.';
135
2.79k
    e++;
136
2.79k
  }
137
3.43k
    }
138
139
689
  if (output)
140
689
    {
141
689
      uint32_t *_out;
142
143
689
      out_u32[out_len] = 0;
144
145
689
      _out = u32_cpy_alloc (out_u32, out_len + 1);
146
689
      if (!_out)
147
0
  {
148
0
    if (errno == ENOMEM)
149
0
      return IDN2_MALLOC;
150
0
    return IDN2_ENCODING_ERROR;
151
0
  }
152
153
689
      *output = _out;
154
689
    }
155
156
689
  return IDN2_OK;
157
689
}
158
159
/**
160
 * idn2_to_unicode_4z4z:
161
 * @input: Input zero-terminated UTF-32 string.
162
 * @output: Newly allocated UTF-32 output string.
163
 * @flags: Currently unused.
164
 *
165
 * Converts a possibly ACE encoded domain name in UTF-32 format into a
166
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
167
 * and must be deallocated by the caller.
168
 *
169
 * @output may be NULL to test lookup of @input without allocating memory.
170
 *
171
 * Returns:
172
 *   %IDN2_OK: The conversion was successful.
173
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
174
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
175
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
176
 *   %IDN2_MALLOC: Memory allocation failed.
177
 *
178
 * Since: 2.0.0
179
 **/
180
int
181
idn2_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
182
508
{
183
508
  uint8_t *input_u8;
184
508
  uint32_t *output_u32;
185
508
  size_t length;
186
508
  int rc;
187
188
508
  if (!input)
189
0
    {
190
0
      if (output)
191
0
  *output = NULL;
192
0
      return IDN2_OK;
193
0
    }
194
195
508
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
196
508
  if (!input_u8)
197
300
    {
198
300
      if (errno == ENOMEM)
199
0
  return IDN2_MALLOC;
200
300
      return IDN2_ENCODING_ERROR;
201
300
    }
202
203
208
  rc = idn2_to_unicode_8z4z ((char *) input_u8, &output_u32, flags);
204
208
  free (input_u8);
205
206
208
  if (rc == IDN2_OK)
207
182
    {
208
182
      if (output)
209
182
  *output = output_u32;
210
0
      else
211
182
  free (output_u32);
212
182
    }
213
214
208
  return rc;
215
508
}
216
217
/**
218
 * idn2_to_unicode_44i:
219
 * @in: Input array with UTF-32 code points.
220
 * @inlen: number of code points of input array
221
 * @out: output array with UTF-32 code points.
222
 * @outlen: on input, maximum size of output array with UTF-32 code points,
223
 *          on exit, actual size of output array with UTF-32 code points.
224
 * @flags: Currently unused.
225
 *
226
 * The ToUnicode operation takes a sequence of UTF-32 code points
227
 * that make up one domain label and returns a sequence of UTF-32
228
 * code points. If the input sequence is a label in ACE form, then the
229
 * result is an equivalent internationalized label that is not in ACE
230
 * form, otherwise the original sequence is returned unaltered.
231
 *
232
 * @output may be NULL to test lookup of @input without allocating memory.
233
 *
234
 * Returns:
235
 *   %IDN2_OK: The conversion was successful.
236
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
237
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
238
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
239
 *   %IDN2_MALLOC: Memory allocation failed.
240
 *
241
 * Since: 2.0.0
242
 **/
243
int
244
idn2_to_unicode_44i (const uint32_t *in, size_t inlen, uint32_t *out,
245
         size_t *outlen, int flags)
246
508
{
247
508
  uint32_t *input_u32;
248
508
  uint32_t *output_u32;
249
508
  size_t len;
250
508
  int rc;
251
252
508
  if (!in)
253
0
    {
254
0
      if (outlen)
255
0
  *outlen = 0;
256
0
      return IDN2_OK;
257
0
    }
258
259
508
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
260
508
  if (!input_u32)
261
0
    return IDN2_MALLOC;
262
263
508
  u32_cpy (input_u32, in, inlen);
264
508
  input_u32[inlen] = 0;
265
266
508
  rc = idn2_to_unicode_4z4z (input_u32, &output_u32, flags);
267
508
  free (input_u32);
268
508
  if (rc != IDN2_OK)
269
326
    return rc;
270
271
182
  len = u32_strlen (output_u32);
272
182
  if (out && outlen)
273
182
    u32_cpy (out, output_u32, len < *outlen ? len : *outlen);
274
182
  free (output_u32);
275
276
182
  if (outlen)
277
182
    *outlen = len;
278
279
182
  return IDN2_OK;
280
508
}
281
282
/**
283
 * idn2_to_unicode_8z8z:
284
 * @input: Input zero-terminated UTF-8 string.
285
 * @output: Newly allocated UTF-8 output string.
286
 * @flags: Currently unused.
287
 *
288
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
289
 * UTF-8 string (punycode decoding). The output buffer will be zero-terminated
290
 * and must be deallocated by the caller.
291
 *
292
 * @output may be NULL to test lookup of @input without allocating memory.
293
 *
294
 * Returns:
295
 *   %IDN2_OK: The conversion was successful.
296
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
297
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
298
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
299
 *   %IDN2_MALLOC: Memory allocation failed.
300
 *
301
 * Since: 2.0.0
302
 **/
303
int
304
idn2_to_unicode_8z8z (const char *input, char **output, int flags)
305
771
{
306
771
  uint32_t *output_u32;
307
771
  uint8_t *output_u8;
308
771
  size_t length;
309
771
  int rc;
310
311
771
  rc = idn2_to_unicode_8z4z (input, &output_u32, flags);
312
771
  if (rc != IDN2_OK || !input)
313
264
    return rc;
314
315
507
  output_u8 =
316
507
    u32_to_u8 (output_u32, u32_strlen (output_u32) + 1, NULL, &length);
317
507
  free (output_u32);
318
319
507
  if (!output_u8)
320
8
    {
321
8
      if (errno == ENOMEM)
322
0
  return IDN2_MALLOC;
323
8
      return IDN2_ENCODING_ERROR;
324
8
    }
325
326
499
  if (output)
327
499
    *output = (char *) output_u8;
328
0
  else
329
499
    free (output_u8);
330
331
499
  return IDN2_OK;
332
507
}
333
334
/**
335
 * idn2_to_unicode_8zlz:
336
 * @input: Input zero-terminated UTF-8 string.
337
 * @output: Newly allocated output string in current locale's character set.
338
 * @flags: Currently unused.
339
 *
340
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
341
 * string encoded in the current locale's character set (punycode
342
 * decoding). The output buffer will be zero-terminated and must be
343
 * deallocated by the caller.
344
 *
345
 * @output may be NULL to test lookup of @input without allocating memory.
346
 *
347
 * Returns:
348
 *   %IDN2_OK: The conversion was successful.
349
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
350
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
351
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
352
 *   %IDN2_MALLOC: Memory allocation failed.
353
 *
354
 * Since: 2.0.0
355
 **/
356
int
357
idn2_to_unicode_8zlz (const char *input, char **output, int flags)
358
771
{
359
771
  int rc;
360
771
  uint8_t *output_u8, *output_l8;
361
771
  const char *encoding;
362
363
771
  rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
364
771
  if (rc != IDN2_OK || !input)
365
272
    return rc;
366
367
499
  encoding = locale_charset ();
368
499
  output_l8 =
369
499
    (uint8_t *) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
370
371
499
  if (!output_l8)
372
234
    {
373
234
      if (errno == ENOMEM)
374
0
  rc = IDN2_MALLOC;
375
234
      else
376
234
  rc = IDN2_ENCODING_ERROR;
377
378
234
      free (output_l8);
379
234
    }
380
265
  else
381
265
    {
382
265
      if (output)
383
265
  *output = (char *) output_l8;
384
0
      else
385
265
  free (output_l8);
386
387
265
      rc = IDN2_OK;
388
265
    }
389
390
499
  free (output_u8);
391
392
499
  return rc;
393
771
}
394
395
/**
396
 * idn2_to_unicode_lzlz:
397
 * @input: Input zero-terminated string encoded in the current locale's character set.
398
 * @output: Newly allocated output string in current locale's character set.
399
 * @flags: Currently unused.
400
 *
401
 * Converts a possibly ACE encoded domain name in the locale's character
402
 * set into a string encoded in the current locale's character set (punycode
403
 * decoding). The output buffer will be zero-terminated and must be
404
 * deallocated by the caller.
405
 *
406
 * @output may be NULL to test lookup of @input without allocating memory.
407
 *
408
 * Returns:
409
 *   %IDN2_OK: The conversion was successful.
410
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
411
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
412
 *   %IDN2_ENCODING_ERROR: Output character conversion failed.
413
 *   %IDN2_ICONV_FAIL: Input character conversion failed.
414
 *   %IDN2_MALLOC: Memory allocation failed.
415
 *
416
 * Since: 2.0.0
417
 **/
418
int
419
idn2_to_unicode_lzlz (const char *input, char **output, int flags)
420
954
{
421
954
  uint8_t *input_l8;
422
954
  const char *encoding;
423
954
  int rc;
424
425
954
  if (!input)
426
0
    {
427
0
      if (output)
428
0
  *output = NULL;
429
0
      return IDN2_OK;
430
0
    }
431
432
954
  encoding = locale_charset ();
433
954
  input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
434
435
954
  if (!input_l8)
436
183
    {
437
183
      if (errno == ENOMEM)
438
0
  return IDN2_MALLOC;
439
183
      return IDN2_ICONV_FAIL;
440
183
    }
441
442
771
  rc = idn2_to_unicode_8zlz ((char *) input_l8, output, flags);
443
771
  free (input_l8);
444
445
771
  return rc;
446
954
}