Coverage Report

Created: 2023-03-26 08:33

/src/libidn2/lib/decode.c
Line
Count
Source (jump to first uncovered line)
1
/* lookup.c - implementation of IDNA2008 lookup functions
2
   Copyright (C) 2011-2022 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include "idn2.h"
32
33
#include <errno.h>    /* errno */
34
#include <stdlib.h>   /* malloc, free */
35
36
#include "punycode.h"
37
38
#include <unitypes.h>
39
#include <uniconv.h>    /* u8_strconv_from_locale */
40
#include <unistr.h>   /* u8_to_u32, u32_cpy, ... */
41
42
/**
43
 * idn2_to_unicode_8z4z:
44
 * @input: Input zero-terminated UTF-8 string.
45
 * @output: Newly allocated UTF-32/UCS-4 output string.
46
 * @flags: Currently unused.
47
 *
48
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
49
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
50
 * and must be deallocated by the caller.
51
 *
52
 * @output may be NULL to test lookup of @input without allocating memory.
53
 *
54
 * Returns:
55
 *   %IDN2_OK: The conversion was successful.
56
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
57
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
58
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
59
 *   %IDN2_MALLOC: Memory allocation failed.
60
 *
61
 * Since: 2.0.0
62
 **/
63
int
64
idn2_to_unicode_8z4z (const char *input, uint32_t ** output,
65
          G_GNUC_UNUSED int flags)
66
0
{
67
0
  uint32_t *domain_u32;
68
0
  int rc;
69
70
0
  if (!input)
71
0
    {
72
0
      if (output)
73
0
  *output = NULL;
74
0
      return IDN2_OK;
75
0
    }
76
77
  /* split into labels and check */
78
0
  uint32_t out_u32[IDN2_DOMAIN_MAX_LENGTH + 1];
79
0
  size_t out_len = 0;
80
0
  const char *e, *s;
81
82
0
  for (e = s = input; *e; s = e)
83
0
    {
84
0
      uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
85
0
      size_t label_len = IDN2_LABEL_MAX_LENGTH;
86
87
0
      while (*e && *e != '.')
88
0
  e++;
89
90
0
      if (e - s >= 4 && (s[0] == 'x' || s[0] == 'X')
91
0
    && (s[1] == 'n' || s[1] == 'N') && s[2] == '-' && s[3] == '-')
92
0
  {
93
0
    s += 4;
94
95
0
    rc = _idn2_punycode_decode_internal (e - s, (char *) s,
96
0
                 &label_len, label_u32);
97
0
    if (rc)
98
0
      return rc;
99
100
0
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
101
0
      return IDN2_TOO_BIG_DOMAIN;
102
103
0
    u32_cpy (out_u32 + out_len, label_u32, label_len);
104
0
  }
105
0
      else
106
0
  {
107
    /* convert UTF-8 input to UTF-32 */
108
0
    if (!
109
0
        (domain_u32 =
110
0
         u8_to_u32 ((uint8_t *) s, e - s, NULL, &label_len)))
111
0
      {
112
0
        if (errno == ENOMEM)
113
0
    return IDN2_MALLOC;
114
0
        return IDN2_ENCODING_ERROR;
115
0
      }
116
117
0
    if (label_len > IDN2_LABEL_MAX_LENGTH)
118
0
      {
119
0
        free (domain_u32);
120
0
        return IDN2_TOO_BIG_LABEL;
121
0
      }
122
123
0
    if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
124
0
      {
125
0
        free (domain_u32);
126
0
        return IDN2_TOO_BIG_DOMAIN;
127
0
      }
128
129
0
    u32_cpy (out_u32 + out_len, domain_u32, label_len);
130
0
    free (domain_u32);
131
0
  }
132
133
0
      out_len += label_len;
134
0
      if (*e)
135
0
  {
136
0
    out_u32[out_len++] = '.';
137
0
    e++;
138
0
  }
139
0
    }
140
141
0
  if (output)
142
0
    {
143
0
      uint32_t *_out;
144
145
0
      out_u32[out_len] = 0;
146
147
0
      _out = u32_cpy_alloc (out_u32, out_len + 1);
148
0
      if (!_out)
149
0
  {
150
0
    if (errno == ENOMEM)
151
0
      return IDN2_MALLOC;
152
0
    return IDN2_ENCODING_ERROR;
153
0
  }
154
155
0
      *output = _out;
156
0
    }
157
158
0
  return IDN2_OK;
159
0
}
160
161
/**
162
 * idn2_to_unicode_4z4z:
163
 * @input: Input zero-terminated UTF-32 string.
164
 * @output: Newly allocated UTF-32 output string.
165
 * @flags: Currently unused.
166
 *
167
 * Converts a possibly ACE encoded domain name in UTF-32 format into a
168
 * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
169
 * and must be deallocated by the caller.
170
 *
171
 * @output may be NULL to test lookup of @input without allocating memory.
172
 *
173
 * Returns:
174
 *   %IDN2_OK: The conversion was successful.
175
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
176
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
177
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
178
 *   %IDN2_MALLOC: Memory allocation failed.
179
 *
180
 * Since: 2.0.0
181
 **/
182
int
183
idn2_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
184
0
{
185
0
  uint8_t *input_u8;
186
0
  uint32_t *output_u32;
187
0
  size_t length;
188
0
  int rc;
189
190
0
  if (!input)
191
0
    {
192
0
      if (output)
193
0
  *output = NULL;
194
0
      return IDN2_OK;
195
0
    }
196
197
0
  input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
198
0
  if (!input_u8)
199
0
    {
200
0
      if (errno == ENOMEM)
201
0
  return IDN2_MALLOC;
202
0
      return IDN2_ENCODING_ERROR;
203
0
    }
204
205
0
  rc = idn2_to_unicode_8z4z ((char *) input_u8, &output_u32, flags);
206
0
  free (input_u8);
207
208
0
  if (rc == IDN2_OK)
209
0
    {
210
0
      if (output)
211
0
  *output = output_u32;
212
0
      else
213
0
  free (output_u32);
214
0
    }
215
216
0
  return rc;
217
0
}
218
219
/**
220
 * idn2_to_unicode_44i:
221
 * @in: Input array with UTF-32 code points.
222
 * @inlen: number of code points of input array
223
 * @out: output array with UTF-32 code points.
224
 * @outlen: on input, maximum size of output array with UTF-32 code points,
225
 *          on exit, actual size of output array with UTF-32 code points.
226
 * @flags: Currently unused.
227
 *
228
 * The ToUnicode operation takes a sequence of UTF-32 code points
229
 * that make up one domain label and returns a sequence of UTF-32
230
 * code points. If the input sequence is a label in ACE form, then the
231
 * result is an equivalent internationalized label that is not in ACE
232
 * form, otherwise the original sequence is returned unaltered.
233
 *
234
 * @output may be NULL to test lookup of @input without allocating memory.
235
 *
236
 * Returns:
237
 *   %IDN2_OK: The conversion was successful.
238
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
239
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
240
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
241
 *   %IDN2_MALLOC: Memory allocation failed.
242
 *
243
 * Since: 2.0.0
244
 **/
245
int
246
idn2_to_unicode_44i (const uint32_t * in, size_t inlen, uint32_t * out,
247
         size_t *outlen, int flags)
248
0
{
249
0
  uint32_t *input_u32;
250
0
  uint32_t *output_u32;
251
0
  size_t len;
252
0
  int rc;
253
254
0
  if (!in)
255
0
    {
256
0
      if (outlen)
257
0
  *outlen = 0;
258
0
      return IDN2_OK;
259
0
    }
260
261
0
  input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
262
0
  if (!input_u32)
263
0
    return IDN2_MALLOC;
264
265
0
  u32_cpy (input_u32, in, inlen);
266
0
  input_u32[inlen] = 0;
267
268
0
  rc = idn2_to_unicode_4z4z (input_u32, &output_u32, flags);
269
0
  free (input_u32);
270
0
  if (rc != IDN2_OK)
271
0
    return rc;
272
273
0
  len = u32_strlen (output_u32);
274
0
  if (out && outlen)
275
0
    u32_cpy (out, output_u32, len < *outlen ? len : *outlen);
276
0
  free (output_u32);
277
278
0
  if (outlen)
279
0
    *outlen = len;
280
281
0
  return IDN2_OK;
282
0
}
283
284
/**
285
 * idn2_to_unicode_8z8z:
286
 * @input: Input zero-terminated UTF-8 string.
287
 * @output: Newly allocated UTF-8 output string.
288
 * @flags: Currently unused.
289
 *
290
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
291
 * UTF-8 string (punycode decoding). The output buffer will be zero-terminated
292
 * and must be deallocated by the caller.
293
 *
294
 * @output may be NULL to test lookup of @input without allocating memory.
295
 *
296
 * Returns:
297
 *   %IDN2_OK: The conversion was successful.
298
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
299
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
300
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
301
 *   %IDN2_MALLOC: Memory allocation failed.
302
 *
303
 * Since: 2.0.0
304
 **/
305
int
306
idn2_to_unicode_8z8z (const char *input, char **output, int flags)
307
0
{
308
0
  uint32_t *output_u32;
309
0
  uint8_t *output_u8;
310
0
  size_t length;
311
0
  int rc;
312
313
0
  rc = idn2_to_unicode_8z4z (input, &output_u32, flags);
314
0
  if (rc != IDN2_OK || !input)
315
0
    return rc;
316
317
0
  output_u8 =
318
0
    u32_to_u8 (output_u32, u32_strlen (output_u32) + 1, NULL, &length);
319
0
  free (output_u32);
320
321
0
  if (!output_u8)
322
0
    {
323
0
      if (errno == ENOMEM)
324
0
  return IDN2_MALLOC;
325
0
      return IDN2_ENCODING_ERROR;
326
0
    }
327
328
0
  if (output)
329
0
    *output = (char *) output_u8;
330
0
  else
331
0
    free (output_u8);
332
333
0
  return IDN2_OK;
334
0
}
335
336
/**
337
 * idn2_to_unicode_8zlz:
338
 * @input: Input zero-terminated UTF-8 string.
339
 * @output: Newly allocated output string in current locale's character set.
340
 * @flags: Currently unused.
341
 *
342
 * Converts a possibly ACE encoded domain name in UTF-8 format into a
343
 * string encoded in the current locale's character set (punycode
344
 * decoding). The output buffer will be zero-terminated and must be
345
 * deallocated by the caller.
346
 *
347
 * @output may be NULL to test lookup of @input without allocating memory.
348
 *
349
 * Returns:
350
 *   %IDN2_OK: The conversion was successful.
351
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
352
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
353
 *   %IDN2_ENCODING_ERROR: Character conversion failed.
354
 *   %IDN2_MALLOC: Memory allocation failed.
355
 *
356
 * Since: 2.0.0
357
 **/
358
int
359
idn2_to_unicode_8zlz (const char *input, char **output, int flags)
360
0
{
361
0
  int rc;
362
0
  uint8_t *output_u8, *output_l8;
363
0
  const char *encoding;
364
365
0
  rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
366
0
  if (rc != IDN2_OK || !input)
367
0
    return rc;
368
369
0
  encoding = locale_charset ();
370
0
  output_l8 =
371
0
    (uint8_t *) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
372
373
0
  if (!output_l8)
374
0
    {
375
0
      if (errno == ENOMEM)
376
0
  rc = IDN2_MALLOC;
377
0
      else
378
0
  rc = IDN2_ENCODING_ERROR;
379
380
0
      free (output_l8);
381
0
    }
382
0
  else
383
0
    {
384
0
      if (output)
385
0
  *output = (char *) output_l8;
386
0
      else
387
0
  free (output_l8);
388
389
0
      rc = IDN2_OK;
390
0
    }
391
392
0
  free (output_u8);
393
394
0
  return rc;
395
0
}
396
397
/**
398
 * idn2_to_unicode_lzlz:
399
 * @input: Input zero-terminated string encoded in the current locale's character set.
400
 * @output: Newly allocated output string in current locale's character set.
401
 * @flags: Currently unused.
402
 *
403
 * Converts a possibly ACE encoded domain name in the locale's character
404
 * set into a string encoded in the current locale's character set (punycode
405
 * decoding). The output buffer will be zero-terminated and must be
406
 * deallocated by the caller.
407
 *
408
 * @output may be NULL to test lookup of @input without allocating memory.
409
 *
410
 * Returns:
411
 *   %IDN2_OK: The conversion was successful.
412
 *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
413
 *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
414
 *   %IDN2_ENCODING_ERROR: Output character conversion failed.
415
 *   %IDN2_ICONV_FAIL: Input character conversion failed.
416
 *   %IDN2_MALLOC: Memory allocation failed.
417
 *
418
 * Since: 2.0.0
419
 **/
420
int
421
idn2_to_unicode_lzlz (const char *input, char **output, int flags)
422
0
{
423
0
  uint8_t *input_l8;
424
0
  const char *encoding;
425
0
  int rc;
426
427
0
  if (!input)
428
0
    {
429
0
      if (output)
430
0
  *output = NULL;
431
0
      return IDN2_OK;
432
0
    }
433
434
0
  encoding = locale_charset ();
435
0
  input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
436
437
0
  if (!input_l8)
438
0
    {
439
0
      if (errno == ENOMEM)
440
0
  return IDN2_MALLOC;
441
0
      return IDN2_ICONV_FAIL;
442
0
    }
443
444
0
  rc = idn2_to_unicode_8zlz ((char *) input_l8, output, flags);
445
0
  free (input_l8);
446
447
0
  return rc;
448
0
}