Coverage Report

Created: 2024-03-08 06:32

/src/libidn2/lib/idna.c
Line
Count
Source (jump to first uncovered line)
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2024 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
0
{
57
0
  int lastCanonicalClass = 0;
58
0
  int result = 1;
59
0
  size_t it;
60
61
0
  for (it = 0; it < len; it++)
62
0
    {
63
0
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
0
      if (ch >= 0x10000)
67
0
  it++;
68
69
0
      int canonicalClass = uc_combining_class (ch);
70
0
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
0
  return 0;
72
73
0
      NFCQCMap *map = get_nfcqc_map (ch);
74
0
      if (map)
75
0
  {
76
0
    if (map->check)
77
0
      return 0;
78
0
    result = -1;
79
0
  }
80
81
0
      lastCanonicalClass = canonicalClass;
82
0
    }
83
84
0
  return result;
85
0
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
17.2k
{
91
17.2k
  uint32_t *p;
92
17.2k
  size_t plen;
93
94
17.2k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
17.2k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
17.2k
  if (nfc && !_isNFC (p, plen))
103
0
    {
104
0
      size_t tmplen;
105
0
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
0
      free (p);
107
0
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
0
      p = tmp;
115
0
      plen = tmplen;
116
0
    }
117
118
17.2k
  *out = p;
119
17.2k
  *outlen = plen;
120
17.2k
  return IDN2_OK;
121
17.2k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
18.8k
{
126
18.8k
  size_t i;
127
128
71.2k
  for (i = 0; i < srclen; i++)
129
66.7k
    if (src[i] >= 0x80)
130
14.4k
      return false;
131
132
4.42k
  return true;
133
18.8k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
34.5k
{
138
34.5k
  if (what & TEST_NFC)
139
34.5k
    {
140
34.5k
      size_t plen;
141
34.5k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
34.5k
           NULL, &plen);
143
34.5k
      int ok;
144
34.5k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
34.5k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
34.5k
      free (p);
152
34.5k
      if (!ok)
153
1.20k
  return IDN2_NOT_NFC;
154
34.5k
    }
155
156
33.3k
  if (what & TEST_2HYPHEN)
157
33.3k
    {
158
33.3k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
220
  return IDN2_2HYPHEN;
160
33.3k
    }
161
162
33.1k
  if (what & TEST_HYPHEN_STARTEND)
163
23.4k
    {
164
23.4k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
664
  return IDN2_HYPHEN_STARTEND;
166
23.4k
    }
167
168
32.4k
  if (what & TEST_LEADING_COMBINING)
169
32.4k
    {
170
32.4k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
823
  return IDN2_LEADING_COMBINING;
172
32.4k
    }
173
174
31.6k
  if (what & TEST_DISALLOWED)
175
9.65k
    {
176
9.65k
      size_t i;
177
60.3k
      for (i = 0; i < llen; i++)
178
53.1k
  if (_idn2_disallowed_p (label[i]))
179
2.48k
    {
180
2.48k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
2.48k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
0
        {
183
0
    IDNAMap map;
184
0
    get_idna_map (label[i], &map);
185
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
0
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
0
      continue;
188
189
0
        }
190
191
2.48k
      return IDN2_DISALLOWED;
192
2.48k
    }
193
9.65k
    }
194
195
29.1k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
29.1k
  if (what & TEST_CONTEXTJ_RULE)
204
7.17k
    {
205
7.17k
      size_t i;
206
7.17k
      int rc;
207
208
51.6k
      for (i = 0; i < llen; i++)
209
44.8k
  {
210
44.8k
    rc = _idn2_contextj_rule (label, llen, i);
211
44.8k
    if (rc != IDN2_OK)
212
377
      return rc;
213
44.8k
  }
214
7.17k
    }
215
216
28.7k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
28.7k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
6.79k
    {
226
6.79k
      size_t i;
227
46.4k
      for (i = 0; i < llen; i++)
228
39.6k
  if (_idn2_contexto_p (label[i])
229
39.6k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
6.79k
    }
232
233
28.7k
  if (what & TEST_CONTEXTO_RULE)
234
0
    {
235
0
      size_t i;
236
0
      int rc;
237
238
0
      for (i = 0; i < llen; i++)
239
0
  {
240
0
    rc = _idn2_contexto_rule (label, llen, i);
241
0
    if (rc != IDN2_OK)
242
0
      return rc;
243
0
  }
244
0
    }
245
246
28.7k
  if (what & TEST_UNASSIGNED)
247
6.79k
    {
248
6.79k
      size_t i;
249
46.4k
      for (i = 0; i < llen; i++)
250
39.6k
  if (_idn2_unassigned_p (label[i]))
251
0
    return IDN2_UNASSIGNED;
252
6.79k
    }
253
254
28.7k
  if (what & TEST_BIDI)
255
6.79k
    {
256
6.79k
      int rc = _idn2_bidi (label, llen);
257
6.79k
      if (rc != IDN2_OK)
258
814
  return rc;
259
6.79k
    }
260
261
27.9k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
27.9k
    {
263
27.9k
      size_t i;
264
27.9k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
198k
      for (i = 0; i < llen; i++)
268
170k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
196k
      for (i = 0; i < llen; i++)
276
169k
  {
277
169k
    IDNAMap map;
278
279
169k
    get_idna_map (label[i], &map);
280
281
169k
    if (map_is (&map, TR46_FLG_VALID) ||
282
169k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
168k
      continue;
284
285
918
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
918
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
0
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
0
      continue;
289
290
918
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
918
      IDN2_INVALID_NONTRANSITIONAL;
292
918
  }
293
27.9k
    }
294
295
27.0k
  return IDN2_OK;
296
27.9k
}