Coverage Report

Created: 2025-12-14 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/decode.c
Line
Count
Source
1
/* decode.c - implementation of IDNA2008 decoding functions
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include "idn2.h"
32
33
#include <errno.h>    /* errno */
34
#include <stdlib.h>   /* malloc, free */
35
36
#include <unitypes.h>
37
#include <uniconv.h>    /* u8_strconv_from_locale */
38
#include <unistr.h>   /* u8_to_u32, u32_cpy, ... */
39
40
/**
41
 * idn2_to_unicode_8z4z:
42
 * @input: Input zero-terminated UTF-8 string.
43
 * @output: Newly allocated UTF-32/UCS-4 output string.
44
 * @flags: Currently unused.
45
 *
46
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
47
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
48
 * and must be deallocated by the caller.
49
 *
50
 * @output may be NULL to test lookup of @input without allocating memory.
51
 *
52
 * Returns:
53
 *   %IDN2_OK: The conversion was successful.
54
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
55
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
56
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
57
 *   %IDN2_MALLOC: Memory allocation failed.
58
 *
59
 * Since: 2.0.0
60
 **/
61
int
62
idn2_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
63
951
{
64
951
  (void) flags;
65
951
  uint32_t *domain_u32;
66
951
  int rc;
67
68
951
  if (!input)
69
0
    {
70
0
      if (output)
71
0
  *output = NULL;
72
0
      return IDN2_OK;
73
0
    }
74
75
  /* split into labels and check */
76
951
  uint32_t out_u32[IDN2_DOMAIN_MAX_LENGTH + 1];
77
951
  size_t out_len = 0;
78
951
  const char *e, *s;
79
80
4.27k
  for (e = s = input; *e; s = e)
81
3.60k
    {
82
3.60k
      uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
83
3.60k
      size_t label_len = IDN2_LABEL_MAX_LENGTH;
84
85
55.8k
      while (*e && *e != '.')
86
52.2k
  e++;
87
88
3.60k
      if (e - s >= 4 && (s[0] == 'x' || s[0] == 'X')
89
1.67k
    && (s[1] == 'n' || s[1] == 'N') && s[2] == '-' && s[3] == '-')
90
1.06k
  {
91
1.06k
    s += 4;
92
93
1.06k
    rc = idn2_punycode_decode ((char *) s, e - s,
94
1.06k
             label_u32, &label_len);
95
1.06k
    if (rc)
96
238
      return rc;
97
98
825
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
99
2
      return IDN2_TOO_BIG_DOMAIN;
100
101
823
    u32_cpy (out_u32 + out_len, label_u32, label_len);
102
823
  }
103
2.53k
      else
104
2.53k
  {
105
    /* convert UTF-8 input to UTF-32 */
106
2.53k
    if (!
107
2.53k
        (domain_u32 =
108
2.53k
         u8_to_u32 ((uint8_t *) s, e - s, NULL, &label_len)))
109
0
      {
110
0
        if (errno == ENOMEM)
111
0
    return IDN2_MALLOC;
112
0
        return IDN2_ENCODING_ERROR;
113
0
      }
114
115
2.53k
    if (label_len > IDN2_LABEL_MAX_LENGTH)
116
36
      {
117
36
        free (domain_u32);
118
36
        return IDN2_TOO_BIG_LABEL;
119
36
      }
120
121
2.50k
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
122
3
      {
123
3
        free (domain_u32);
124
3
        return IDN2_TOO_BIG_DOMAIN;
125
3
      }
126
127
2.50k
    u32_cpy (out_u32 + out_len, domain_u32, label_len);
128
2.50k
    free (domain_u32);
129
2.50k
  }
130
131
3.32k
      out_len += label_len;
132
3.32k
      if (*e)
133
2.73k
  {
134
2.73k
    out_u32[out_len++] = '.';
135
2.73k
    e++;
136
2.73k
  }
137
3.32k
    }
138
139
672
  if (output)
140
672
    {
141
672
      uint32_t *_out;
142
143
672
      out_u32[out_len] = 0;
144
145
672
      _out = u32_cpy_alloc (out_u32, out_len + 1);
146
672
      if (!_out)
147
0
  {
148
0
    if (errno == ENOMEM)
149
0
      return IDN2_MALLOC;
150
0
    return IDN2_ENCODING_ERROR;
151
0
  }
152
153
672
      *output = _out;
154
672
    }
155
156
672
  return IDN2_OK;
157
672
}
158
159
/**
160
 * idn2_to_unicode_4z4z:
161
 * @input: Input zero-terminated UTF-32 string.
162
 * @output: Newly allocated UTF-32 output string.
163
 * @flags: Currently unused.
164
 *
165
 * Converts a possibly ACE encoded domain name in UTF-32 format into a
166
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
167
 * and must be deallocated by the caller.
168
 *
169
 * @output may be NULL to test lookup of @input without allocating memory.
170
 *
171
 * Returns:
172
 *   %IDN2_OK: The conversion was successful.
173
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
174
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
175
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
176
 *   %IDN2_MALLOC: Memory allocation failed.
177
 *
178
 * Since: 2.0.0
179
 **/
180
int
181
idn2_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
182
461
{
183
461
  uint8_t *input_u8;
184
461
  uint32_t *output_u32;
185
461
  size_t length;
186
461
  int rc;
187
188
461
  if (!input)
189
0
    {
190
0
      if (output)
191
0
  *output = NULL;
192
0
      return IDN2_OK;
193
0
    }
194
195
461
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
196
461
  if (!input_u8)
197
270
    {
198
270
      if (errno == ENOMEM)
199
0
  return IDN2_MALLOC;
200
270
      return IDN2_ENCODING_ERROR;
201
270
    }
202
203
191
  rc = idn2_to_unicode_8z4z ((char *) input_u8, &output_u32, flags);
204
191
  free (input_u8);
205
206
191
  if (rc == IDN2_OK)
207
166
    {
208
166
      if (output)
209
166
  *output = output_u32;
210
0
      else
211
166
  free (output_u32);
212
166
    }
213
214
191
  return rc;
215
461
}
216
217
/**
218
 * idn2_to_unicode_44i:
219
 * @in: Input array with UTF-32 code points.
220
 * @inlen: number of code points of input array
221
 * @out: output array with UTF-32 code points.
222
 * @outlen: on input, maximum size of output array with UTF-32 code points,
223
 *          on exit, actual size of output array with UTF-32 code points.
224
 * @flags: Currently unused.
225
 *
226
 * The ToUnicode operation takes a sequence of UTF-32 code points
227
 * that make up one domain label and returns a sequence of UTF-32
228
 * code points. If the input sequence is a label in ACE form, then the
229
 * result is an equivalent internationalized label that is not in ACE
230
 * form, otherwise the original sequence is returned unaltered.
231
 *
232
 * @output may be NULL to test lookup of @input without allocating memory.
233
 *
234
 * Returns:
235
 *   %IDN2_OK: The conversion was successful.
236
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
237
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
238
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
239
 *   %IDN2_MALLOC: Memory allocation failed.
240
 *
241
 * Since: 2.0.0
242
 **/
243
int
244
idn2_to_unicode_44i (const uint32_t *in, size_t inlen, uint32_t *out,
245
         size_t *outlen, int flags)
246
461
{
247
461
  uint32_t *input_u32;
248
461
  uint32_t *output_u32;
249
461
  size_t len;
250
461
  int rc;
251
252
461
  if (!in)
253
0
    {
254
0
      if (outlen)
255
0
  *outlen = 0;
256
0
      return IDN2_OK;
257
0
    }
258
259
461
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
260
461
  if (!input_u32)
261
0
    return IDN2_MALLOC;
262
263
461
  u32_cpy (input_u32, in, inlen);
264
461
  input_u32[inlen] = 0;
265
266
461
  rc = idn2_to_unicode_4z4z (input_u32, &output_u32, flags);
267
461
  free (input_u32);
268
461
  if (rc != IDN2_OK)
269
295
    return rc;
270
271
166
  len = u32_strlen (output_u32);
272
166
  if (out && outlen)
273
166
    u32_cpy (out, output_u32, len < *outlen ? len : *outlen);
274
166
  free (output_u32);
275
276
166
  if (outlen)
277
166
    *outlen = len;
278
279
166
  return IDN2_OK;
280
461
}
281
282
/**
283
 * idn2_to_unicode_8z8z:
284
 * @input: Input zero-terminated UTF-8 string.
285
 * @output: Newly allocated UTF-8 output string.
286
 * @flags: Currently unused.
287
 *
288
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
289
 * UTF-8 string (punycode decoding). The output buffer will be zero-terminated
290
 * and must be deallocated by the caller.
291
 *
292
 * @output may be NULL to test lookup of @input without allocating memory.
293
 *
294
 * Returns:
295
 *   %IDN2_OK: The conversion was successful.
296
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
297
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
298
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
299
 *   %IDN2_MALLOC: Memory allocation failed.
300
 *
301
 * Since: 2.0.0
302
 **/
303
int
304
idn2_to_unicode_8z8z (const char *input, char **output, int flags)
305
760
{
306
760
  uint32_t *output_u32;
307
760
  uint8_t *output_u8;
308
760
  size_t length;
309
760
  int rc;
310
311
760
  rc = idn2_to_unicode_8z4z (input, &output_u32, flags);
312
760
  if (rc != IDN2_OK || !input)
313
254
    return rc;
314
315
506
  output_u8 =
316
506
    u32_to_u8 (output_u32, u32_strlen (output_u32) + 1, NULL, &length);
317
506
  free (output_u32);
318
319
506
  if (!output_u8)
320
3
    {
321
3
      if (errno == ENOMEM)
322
0
  return IDN2_MALLOC;
323
3
      return IDN2_ENCODING_ERROR;
324
3
    }
325
326
503
  if (output)
327
503
    *output = (char *) output_u8;
328
0
  else
329
503
    free (output_u8);
330
331
503
  return IDN2_OK;
332
506
}
333
334
/**
335
 * idn2_to_unicode_8zlz:
336
 * @input: Input zero-terminated UTF-8 string.
337
 * @output: Newly allocated output string in current locale's character set.
338
 * @flags: Currently unused.
339
 *
340
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
341
 * string encoded in the current locale's character set (punycode
342
 * decoding). The output buffer will be zero-terminated and must be
343
 * deallocated by the caller.
344
 *
345
 * @output may be NULL to test lookup of @input without allocating memory.
346
 *
347
 * Returns:
348
 *   %IDN2_OK: The conversion was successful.
349
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
350
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
351
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
352
 *   %IDN2_MALLOC: Memory allocation failed.
353
 *
354
 * Since: 2.0.0
355
 **/
356
int
357
idn2_to_unicode_8zlz (const char *input, char **output, int flags)
358
760
{
359
760
  int rc;
360
760
  uint8_t *output_u8, *output_l8;
361
760
  const char *encoding;
362
363
760
  rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
364
760
  if (rc != IDN2_OK || !input)
365
257
    return rc;
366
367
503
  encoding = locale_charset ();
368
503
  output_l8 =
369
503
    (uint8_t *) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
370
371
503
  if (!output_l8)
372
225
    {
373
225
      if (errno == ENOMEM)
374
0
  rc = IDN2_MALLOC;
375
225
      else
376
225
  rc = IDN2_ENCODING_ERROR;
377
378
225
      free (output_l8);
379
225
    }
380
278
  else
381
278
    {
382
278
      if (output)
383
278
  *output = (char *) output_l8;
384
0
      else
385
278
  free (output_l8);
386
387
278
      rc = IDN2_OK;
388
278
    }
389
390
503
  free (output_u8);
391
392
503
  return rc;
393
760
}
394
395
/**
396
 * idn2_to_unicode_lzlz:
397
 * @input: Input zero-terminated string encoded in the current locale's character set.
398
 * @output: Newly allocated output string in current locale's character set.
399
 * @flags: Currently unused.
400
 *
401
 * Converts a possibly ACE encoded domain name in the locale's character
402
 * set into a string encoded in the current locale's character set (punycode
403
 * decoding). The output buffer will be zero-terminated and must be
404
 * deallocated by the caller.
405
 *
406
 * @output may be NULL to test lookup of @input without allocating memory.
407
 *
408
 * Returns:
409
 *   %IDN2_OK: The conversion was successful.
410
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
411
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
412
 *   %IDN2_ENCODING_ERROR: Output character conversion failed.
413
 *   %IDN2_ICONV_FAIL: Input character conversion failed.
414
 *   %IDN2_MALLOC: Memory allocation failed.
415
 *
416
 * Since: 2.0.0
417
 **/
418
int
419
idn2_to_unicode_lzlz (const char *input, char **output, int flags)
420
896
{
421
896
  uint8_t *input_l8;
422
896
  const char *encoding;
423
896
  int rc;
424
425
896
  if (!input)
426
0
    {
427
0
      if (output)
428
0
  *output = NULL;
429
0
      return IDN2_OK;
430
0
    }
431
432
896
  encoding = locale_charset ();
433
896
  input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
434
435
896
  if (!input_l8)
436
136
    {
437
136
      if (errno == ENOMEM)
438
0
  return IDN2_MALLOC;
439
136
      return IDN2_ICONV_FAIL;
440
136
    }
441
442
760
  rc = idn2_to_unicode_8zlz ((char *) input_l8, output, flags);
443
760
  free (input_l8);
444
445
760
  return rc;
446
896
}