Coverage Report

Created: 2025-03-06 07:58

/src/libidn2/lib/decode.c
Line
Count
Source (jump to first uncovered line)
1
/* decode.c - implementation of IDNA2008 decoding functions
2
   Copyright (C) 2011-2024 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include "idn2.h"
32
33
#include <errno.h>    /* errno */
34
#include <stdlib.h>   /* malloc, free */
35
36
#include <unitypes.h>
37
#include <uniconv.h>    /* u8_strconv_from_locale */
38
#include <unistr.h>   /* u8_to_u32, u32_cpy, ... */
39
40
/**
41
 * idn2_to_unicode_8z4z:
42
 * @input: Input zero-terminated UTF-8 string.
43
 * @output: Newly allocated UTF-32/UCS-4 output string.
44
 * @flags: Currently unused.
45
 *
46
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
47
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
48
 * and must be deallocated by the caller.
49
 *
50
 * @output may be NULL to test lookup of @input without allocating memory.
51
 *
52
 * Returns:
53
 *   %IDN2_OK: The conversion was successful.
54
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
55
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
56
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
57
 *   %IDN2_MALLOC: Memory allocation failed.
58
 *
59
 * Since: 2.0.0
60
 **/
61
int
62
idn2_to_unicode_8z4z (const char *input, uint32_t **output,
63
          G_GNUC_UNUSED int flags)
64
0
{
65
0
  uint32_t *domain_u32;
66
0
  int rc;
67
68
0
  if (!input)
69
0
    {
70
0
      if (output)
71
0
  *output = NULL;
72
0
      return IDN2_OK;
73
0
    }
74
75
  /* split into labels and check */
76
0
  uint32_t out_u32[IDN2_DOMAIN_MAX_LENGTH + 1];
77
0
  size_t out_len = 0;
78
0
  const char *e, *s;
79
80
0
  for (e = s = input; *e; s = e)
81
0
    {
82
0
      uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
83
0
      size_t label_len = IDN2_LABEL_MAX_LENGTH;
84
85
0
      while (*e && *e != '.')
86
0
  e++;
87
88
0
      if (e - s >= 4 && (s[0] == 'x' || s[0] == 'X')
89
0
    && (s[1] == 'n' || s[1] == 'N') && s[2] == '-' && s[3] == '-')
90
0
  {
91
0
    s += 4;
92
93
0
    rc = idn2_punycode_decode ((char *) s, e - s,
94
0
             label_u32, &label_len);
95
0
    if (rc)
96
0
      return rc;
97
98
0
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
99
0
      return IDN2_TOO_BIG_DOMAIN;
100
101
0
    u32_cpy (out_u32 + out_len, label_u32, label_len);
102
0
  }
103
0
      else
104
0
  {
105
    /* convert UTF-8 input to UTF-32 */
106
0
    if (!
107
0
        (domain_u32 =
108
0
         u8_to_u32 ((uint8_t *) s, e - s, NULL, &label_len)))
109
0
      {
110
0
        if (errno == ENOMEM)
111
0
    return IDN2_MALLOC;
112
0
        return IDN2_ENCODING_ERROR;
113
0
      }
114
115
0
    if (label_len > IDN2_LABEL_MAX_LENGTH)
116
0
      {
117
0
        free (domain_u32);
118
0
        return IDN2_TOO_BIG_LABEL;
119
0
      }
120
121
0
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
122
0
      {
123
0
        free (domain_u32);
124
0
        return IDN2_TOO_BIG_DOMAIN;
125
0
      }
126
127
0
    u32_cpy (out_u32 + out_len, domain_u32, label_len);
128
0
    free (domain_u32);
129
0
  }
130
131
0
      out_len += label_len;
132
0
      if (*e)
133
0
  {
134
0
    out_u32[out_len++] = '.';
135
0
    e++;
136
0
  }
137
0
    }
138
139
0
  if (output)
140
0
    {
141
0
      uint32_t *_out;
142
143
0
      out_u32[out_len] = 0;
144
145
0
      _out = u32_cpy_alloc (out_u32, out_len + 1);
146
0
      if (!_out)
147
0
  {
148
0
    if (errno == ENOMEM)
149
0
      return IDN2_MALLOC;
150
0
    return IDN2_ENCODING_ERROR;
151
0
  }
152
153
0
      *output = _out;
154
0
    }
155
156
0
  return IDN2_OK;
157
0
}
158
159
/**
160
 * idn2_to_unicode_4z4z:
161
 * @input: Input zero-terminated UTF-32 string.
162
 * @output: Newly allocated UTF-32 output string.
163
 * @flags: Currently unused.
164
 *
165
 * Converts a possibly ACE encoded domain name in UTF-32 format into a
166
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
167
 * and must be deallocated by the caller.
168
 *
169
 * @output may be NULL to test lookup of @input without allocating memory.
170
 *
171
 * Returns:
172
 *   %IDN2_OK: The conversion was successful.
173
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
174
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
175
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
176
 *   %IDN2_MALLOC: Memory allocation failed.
177
 *
178
 * Since: 2.0.0
179
 **/
180
int
181
idn2_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
182
0
{
183
0
  uint8_t *input_u8;
184
0
  uint32_t *output_u32;
185
0
  size_t length;
186
0
  int rc;
187
188
0
  if (!input)
189
0
    {
190
0
      if (output)
191
0
  *output = NULL;
192
0
      return IDN2_OK;
193
0
    }
194
195
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
196
0
  if (!input_u8)
197
0
    {
198
0
      if (errno == ENOMEM)
199
0
  return IDN2_MALLOC;
200
0
      return IDN2_ENCODING_ERROR;
201
0
    }
202
203
0
  rc = idn2_to_unicode_8z4z ((char *) input_u8, &output_u32, flags);
204
0
  free (input_u8);
205
206
0
  if (rc == IDN2_OK)
207
0
    {
208
0
      if (output)
209
0
  *output = output_u32;
210
0
      else
211
0
  free (output_u32);
212
0
    }
213
214
0
  return rc;
215
0
}
216
217
/**
218
 * idn2_to_unicode_44i:
219
 * @in: Input array with UTF-32 code points.
220
 * @inlen: number of code points of input array
221
 * @out: output array with UTF-32 code points.
222
 * @outlen: on input, maximum size of output array with UTF-32 code points,
223
 *          on exit, actual size of output array with UTF-32 code points.
224
 * @flags: Currently unused.
225
 *
226
 * The ToUnicode operation takes a sequence of UTF-32 code points
227
 * that make up one domain label and returns a sequence of UTF-32
228
 * code points. If the input sequence is a label in ACE form, then the
229
 * result is an equivalent internationalized label that is not in ACE
230
 * form, otherwise the original sequence is returned unaltered.
231
 *
232
 * @output may be NULL to test lookup of @input without allocating memory.
233
 *
234
 * Returns:
235
 *   %IDN2_OK: The conversion was successful.
236
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
237
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
238
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
239
 *   %IDN2_MALLOC: Memory allocation failed.
240
 *
241
 * Since: 2.0.0
242
 **/
243
int
244
idn2_to_unicode_44i (const uint32_t *in, size_t inlen, uint32_t *out,
245
         size_t *outlen, int flags)
246
0
{
247
0
  uint32_t *input_u32;
248
0
  uint32_t *output_u32;
249
0
  size_t len;
250
0
  int rc;
251
252
0
  if (!in)
253
0
    {
254
0
      if (outlen)
255
0
  *outlen = 0;
256
0
      return IDN2_OK;
257
0
    }
258
259
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
260
0
  if (!input_u32)
261
0
    return IDN2_MALLOC;
262
263
0
  u32_cpy (input_u32, in, inlen);
264
0
  input_u32[inlen] = 0;
265
266
0
  rc = idn2_to_unicode_4z4z (input_u32, &output_u32, flags);
267
0
  free (input_u32);
268
0
  if (rc != IDN2_OK)
269
0
    return rc;
270
271
0
  len = u32_strlen (output_u32);
272
0
  if (out && outlen)
273
0
    u32_cpy (out, output_u32, len < *outlen ? len : *outlen);
274
0
  free (output_u32);
275
276
0
  if (outlen)
277
0
    *outlen = len;
278
279
0
  return IDN2_OK;
280
0
}
281
282
/**
283
 * idn2_to_unicode_8z8z:
284
 * @input: Input zero-terminated UTF-8 string.
285
 * @output: Newly allocated UTF-8 output string.
286
 * @flags: Currently unused.
287
 *
288
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
289
 * UTF-8 string (punycode decoding). The output buffer will be zero-terminated
290
 * and must be deallocated by the caller.
291
 *
292
 * @output may be NULL to test lookup of @input without allocating memory.
293
 *
294
 * Returns:
295
 *   %IDN2_OK: The conversion was successful.
296
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
297
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
298
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
299
 *   %IDN2_MALLOC: Memory allocation failed.
300
 *
301
 * Since: 2.0.0
302
 **/
303
int
304
idn2_to_unicode_8z8z (const char *input, char **output, int flags)
305
0
{
306
0
  uint32_t *output_u32;
307
0
  uint8_t *output_u8;
308
0
  size_t length;
309
0
  int rc;
310
311
0
  rc = idn2_to_unicode_8z4z (input, &output_u32, flags);
312
0
  if (rc != IDN2_OK || !input)
313
0
    return rc;
314
315
0
  output_u8 =
316
0
    u32_to_u8 (output_u32, u32_strlen (output_u32) + 1, NULL, &length);
317
0
  free (output_u32);
318
319
0
  if (!output_u8)
320
0
    {
321
0
      if (errno == ENOMEM)
322
0
  return IDN2_MALLOC;
323
0
      return IDN2_ENCODING_ERROR;
324
0
    }
325
326
0
  if (output)
327
0
    *output = (char *) output_u8;
328
0
  else
329
0
    free (output_u8);
330
331
0
  return IDN2_OK;
332
0
}
333
334
/**
335
 * idn2_to_unicode_8zlz:
336
 * @input: Input zero-terminated UTF-8 string.
337
 * @output: Newly allocated output string in current locale's character set.
338
 * @flags: Currently unused.
339
 *
340
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
341
 * string encoded in the current locale's character set (punycode
342
 * decoding). The output buffer will be zero-terminated and must be
343
 * deallocated by the caller.
344
 *
345
 * @output may be NULL to test lookup of @input without allocating memory.
346
 *
347
 * Returns:
348
 *   %IDN2_OK: The conversion was successful.
349
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
350
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
351
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
352
 *   %IDN2_MALLOC: Memory allocation failed.
353
 *
354
 * Since: 2.0.0
355
 **/
356
int
357
idn2_to_unicode_8zlz (const char *input, char **output, int flags)
358
0
{
359
0
  int rc;
360
0
  uint8_t *output_u8, *output_l8;
361
0
  const char *encoding;
362
363
0
  rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
364
0
  if (rc != IDN2_OK || !input)
365
0
    return rc;
366
367
0
  encoding = locale_charset ();
368
0
  output_l8 =
369
0
    (uint8_t *) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
370
371
0
  if (!output_l8)
372
0
    {
373
0
      if (errno == ENOMEM)
374
0
  rc = IDN2_MALLOC;
375
0
      else
376
0
  rc = IDN2_ENCODING_ERROR;
377
378
0
      free (output_l8);
379
0
    }
380
0
  else
381
0
    {
382
0
      if (output)
383
0
  *output = (char *) output_l8;
384
0
      else
385
0
  free (output_l8);
386
387
0
      rc = IDN2_OK;
388
0
    }
389
390
0
  free (output_u8);
391
392
0
  return rc;
393
0
}
394
395
/**
396
 * idn2_to_unicode_lzlz:
397
 * @input: Input zero-terminated string encoded in the current locale's character set.
398
 * @output: Newly allocated output string in current locale's character set.
399
 * @flags: Currently unused.
400
 *
401
 * Converts a possibly ACE encoded domain name in the locale's character
402
 * set into a string encoded in the current locale's character set (punycode
403
 * decoding). The output buffer will be zero-terminated and must be
404
 * deallocated by the caller.
405
 *
406
 * @output may be NULL to test lookup of @input without allocating memory.
407
 *
408
 * Returns:
409
 *   %IDN2_OK: The conversion was successful.
410
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
411
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
412
 *   %IDN2_ENCODING_ERROR: Output character conversion failed.
413
 *   %IDN2_ICONV_FAIL: Input character conversion failed.
414
 *   %IDN2_MALLOC: Memory allocation failed.
415
 *
416
 * Since: 2.0.0
417
 **/
418
int
419
idn2_to_unicode_lzlz (const char *input, char **output, int flags)
420
0
{
421
0
  uint8_t *input_l8;
422
0
  const char *encoding;
423
0
  int rc;
424
425
0
  if (!input)
426
0
    {
427
0
      if (output)
428
0
  *output = NULL;
429
0
      return IDN2_OK;
430
0
    }
431
432
0
  encoding = locale_charset ();
433
0
  input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
434
435
0
  if (!input_l8)
436
0
    {
437
0
      if (errno == ENOMEM)
438
0
  return IDN2_MALLOC;
439
0
      return IDN2_ICONV_FAIL;
440
0
    }
441
442
0
  rc = idn2_to_unicode_8zlz ((char *) input_l8, output, flags);
443
0
  free (input_l8);
444
445
0
  return rc;
446
0
}