Coverage Report

Created: 2025-01-28 06:58

/src/libidn2/lib/idna.c
Line
Count
Source (jump to first uncovered line)
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2024 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
0
{
57
0
  int lastCanonicalClass = 0;
58
0
  int result = 1;
59
0
  size_t it;
60
61
0
  for (it = 0; it < len; it++)
62
0
    {
63
0
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
0
      if (ch >= 0x10000)
67
0
  it++;
68
69
0
      int canonicalClass = uc_combining_class (ch);
70
0
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
0
  return 0;
72
73
0
      NFCQCMap *map = get_nfcqc_map (ch);
74
0
      if (map)
75
0
  {
76
0
    if (map->check)
77
0
      return 0;
78
0
    result = -1;
79
0
  }
80
81
0
      lastCanonicalClass = canonicalClass;
82
0
    }
83
84
0
  return result;
85
0
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
24.5k
{
91
24.5k
  uint32_t *p;
92
24.5k
  size_t plen;
93
94
24.5k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
24.5k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
24.5k
  if (nfc && !_isNFC (p, plen))
103
0
    {
104
0
      size_t tmplen;
105
0
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
0
      free (p);
107
0
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
0
      p = tmp;
115
0
      plen = tmplen;
116
0
    }
117
118
24.5k
  *out = p;
119
24.5k
  *outlen = plen;
120
24.5k
  return IDN2_OK;
121
24.5k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
28.3k
{
126
28.3k
  size_t i;
127
128
110k
  for (i = 0; i < srclen; i++)
129
105k
    if (src[i] >= 0x80)
130
23.3k
      return false;
131
132
4.98k
  return true;
133
28.3k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
47.7k
{
138
47.7k
  if (what & TEST_NFC)
139
47.7k
    {
140
47.7k
      size_t plen;
141
47.7k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
47.7k
           NULL, &plen);
143
47.7k
      int ok;
144
47.7k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
47.7k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
47.7k
      free (p);
152
47.7k
      if (!ok)
153
1.29k
  return IDN2_NOT_NFC;
154
47.7k
    }
155
156
46.4k
  if (what & TEST_2HYPHEN)
157
46.4k
    {
158
46.4k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
700
  return IDN2_2HYPHEN;
160
46.4k
    }
161
162
45.7k
  if (what & TEST_HYPHEN_STARTEND)
163
30.6k
    {
164
30.6k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
1.17k
  return IDN2_HYPHEN_STARTEND;
166
30.6k
    }
167
168
44.5k
  if (what & TEST_LEADING_COMBINING)
169
44.5k
    {
170
44.5k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
856
  return IDN2_LEADING_COMBINING;
172
44.5k
    }
173
174
43.6k
  if (what & TEST_DISALLOWED)
175
15.0k
    {
176
15.0k
      size_t i;
177
106k
      for (i = 0; i < llen; i++)
178
94.1k
  if (_idn2_disallowed_p (label[i]))
179
3.20k
    {
180
3.20k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
3.20k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
0
        {
183
0
    IDNAMap map;
184
0
    get_idna_map (label[i], &map);
185
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
0
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
0
      continue;
188
189
0
        }
190
191
3.20k
      return IDN2_DISALLOWED;
192
3.20k
    }
193
15.0k
    }
194
195
40.4k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
40.4k
  if (what & TEST_CONTEXTJ_RULE)
204
11.8k
    {
205
11.8k
      size_t i;
206
11.8k
      int rc;
207
208
94.6k
      for (i = 0; i < llen; i++)
209
84.6k
  {
210
84.6k
    rc = _idn2_contextj_rule (label, llen, i);
211
84.6k
    if (rc != IDN2_OK)
212
1.88k
      return rc;
213
84.6k
  }
214
11.8k
    }
215
216
38.5k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
38.5k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
10.0k
    {
226
10.0k
      size_t i;
227
89.2k
      for (i = 0; i < llen; i++)
228
79.2k
  if (_idn2_contexto_p (label[i])
229
79.2k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
10.0k
    }
232
233
38.5k
  if (what & TEST_CONTEXTO_RULE)
234
0
    {
235
0
      size_t i;
236
0
      int rc;
237
238
0
      for (i = 0; i < llen; i++)
239
0
  {
240
0
    rc = _idn2_contexto_rule (label, llen, i);
241
0
    if (rc != IDN2_OK)
242
0
      return rc;
243
0
  }
244
0
    }
245
246
38.5k
  if (what & TEST_UNASSIGNED)
247
10.0k
    {
248
10.0k
      size_t i;
249
89.2k
      for (i = 0; i < llen; i++)
250
79.2k
  if (_idn2_unassigned_p (label[i]))
251
0
    return IDN2_UNASSIGNED;
252
10.0k
    }
253
254
38.5k
  if (what & TEST_BIDI)
255
10.0k
    {
256
10.0k
      int rc = _idn2_bidi (label, llen);
257
10.0k
      if (rc != IDN2_OK)
258
1.97k
  return rc;
259
10.0k
    }
260
261
36.6k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
36.6k
    {
263
36.6k
      size_t i;
264
36.6k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
349k
      for (i = 0; i < llen; i++)
268
313k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
348k
      for (i = 0; i < llen; i++)
276
312k
  {
277
312k
    IDNAMap map;
278
279
312k
    get_idna_map (label[i], &map);
280
281
312k
    if (map_is (&map, TR46_FLG_VALID) ||
282
312k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
311k
      continue;
284
285
801
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
801
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
0
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
0
      continue;
289
290
801
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
801
      IDN2_INVALID_NONTRANSITIONAL;
292
801
  }
293
36.6k
    }
294
295
35.8k
  return IDN2_OK;
296
36.6k
}