Coverage Report

Created: 2025-10-10 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
4.58k
{
57
4.58k
  int lastCanonicalClass = 0;
58
4.58k
  int result = 1;
59
4.58k
  size_t it;
60
61
42.8k
  for (it = 0; it < len; it++)
62
39.0k
    {
63
39.0k
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
39.0k
      if (ch >= 0x10000)
67
1.56k
  it++;
68
69
39.0k
      int canonicalClass = uc_combining_class (ch);
70
39.0k
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
86
  return 0;
72
73
38.9k
      NFCQCMap *map = get_nfcqc_map (ch);
74
38.9k
      if (map)
75
681
  {
76
681
    if (map->check)
77
681
      return 0;
78
0
    result = -1;
79
0
  }
80
81
38.2k
      lastCanonicalClass = canonicalClass;
82
38.2k
    }
83
84
3.82k
  return result;
85
4.58k
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
19.9k
{
91
19.9k
  uint32_t *p;
92
19.9k
  size_t plen;
93
94
19.9k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
19.9k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
19.9k
  if (nfc && !_isNFC (p, plen))
103
767
    {
104
767
      size_t tmplen;
105
767
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
767
      free (p);
107
767
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
767
      p = tmp;
115
767
      plen = tmplen;
116
767
    }
117
118
19.9k
  *out = p;
119
19.9k
  *outlen = plen;
120
19.9k
  return IDN2_OK;
121
19.9k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
30.5k
{
126
30.5k
  size_t i;
127
128
170k
  for (i = 0; i < srclen; i++)
129
155k
    if (src[i] >= 0x80)
130
16.0k
      return false;
131
132
14.5k
  return true;
133
30.5k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
46.2k
{
138
46.2k
  if (what & TEST_NFC)
139
46.2k
    {
140
46.2k
      size_t plen;
141
46.2k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
46.2k
           NULL, &plen);
143
46.2k
      int ok;
144
46.2k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
46.2k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
46.2k
      free (p);
152
46.2k
      if (!ok)
153
2.19k
  return IDN2_NOT_NFC;
154
46.2k
    }
155
156
44.0k
  if (what & TEST_2HYPHEN)
157
44.0k
    {
158
44.0k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
406
  return IDN2_2HYPHEN;
160
44.0k
    }
161
162
43.6k
  if (what & TEST_HYPHEN_STARTEND)
163
31.4k
    {
164
31.4k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
1.02k
  return IDN2_HYPHEN_STARTEND;
166
31.4k
    }
167
168
42.6k
  if (what & TEST_LEADING_COMBINING)
169
42.6k
    {
170
42.6k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
1.15k
  return IDN2_LEADING_COMBINING;
172
42.6k
    }
173
174
41.4k
  if (what & TEST_DISALLOWED)
175
13.7k
    {
176
13.7k
      size_t i;
177
134k
      for (i = 0; i < llen; i++)
178
122k
  if (_idn2_disallowed_p (label[i]))
179
16.8k
    {
180
16.8k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
16.4k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
16.0k
        {
183
16.0k
    IDNAMap map;
184
16.0k
    get_idna_map (label[i], &map);
185
16.0k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
2.73k
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
14.8k
      continue;
188
189
16.0k
        }
190
191
1.95k
      return IDN2_DISALLOWED;
192
16.8k
    }
193
13.7k
    }
194
195
39.4k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
39.4k
  if (what & TEST_CONTEXTJ_RULE)
204
11.7k
    {
205
11.7k
      size_t i;
206
11.7k
      int rc;
207
208
123k
      for (i = 0; i < llen; i++)
209
112k
  {
210
112k
    rc = _idn2_contextj_rule (label, llen, i);
211
112k
    if (rc != IDN2_OK)
212
986
      return rc;
213
112k
  }
214
11.7k
    }
215
216
38.5k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
38.5k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
9.76k
    {
226
9.76k
      size_t i;
227
106k
      for (i = 0; i < llen; i++)
228
96.7k
  if (_idn2_contexto_p (label[i])
229
6.09k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
9.76k
    }
232
233
38.5k
  if (what & TEST_CONTEXTO_RULE)
234
1.00k
    {
235
1.00k
      size_t i;
236
1.00k
      int rc;
237
238
7.12k
      for (i = 0; i < llen; i++)
239
6.25k
  {
240
6.25k
    rc = _idn2_contexto_rule (label, llen, i);
241
6.25k
    if (rc != IDN2_OK)
242
131
      return rc;
243
6.25k
  }
244
1.00k
    }
245
246
38.3k
  if (what & TEST_UNASSIGNED)
247
10.6k
    {
248
10.6k
      size_t i;
249
112k
      for (i = 0; i < llen; i++)
250
102k
  if (_idn2_unassigned_p (label[i]))
251
147
    return IDN2_UNASSIGNED;
252
10.6k
    }
253
254
38.2k
  if (what & TEST_BIDI)
255
10.4k
    {
256
10.4k
      int rc = _idn2_bidi (label, llen);
257
10.4k
      if (rc != IDN2_OK)
258
1.68k
  return rc;
259
10.4k
    }
260
261
36.5k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
35.9k
    {
263
35.9k
      size_t i;
264
35.9k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
282k
      for (i = 0; i < llen; i++)
268
246k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
277k
      for (i = 0; i < llen; i++)
276
244k
  {
277
244k
    IDNAMap map;
278
279
244k
    get_idna_map (label[i], &map);
280
281
244k
    if (map_is (&map, TR46_FLG_VALID) ||
282
38.9k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
213k
      continue;
284
285
31.3k
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
30.3k
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
4.59k
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
28.4k
      continue;
289
290
2.90k
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
2.90k
      IDN2_INVALID_NONTRANSITIONAL;
292
31.3k
  }
293
35.9k
    }
294
295
33.6k
  return IDN2_OK;
296
36.5k
}