Coverage Report

Created: 2026-03-31 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
4.85k
{
57
4.85k
  int lastCanonicalClass = 0;
58
4.85k
  int result = 1;
59
4.85k
  size_t it;
60
61
43.9k
  for (it = 0; it < len; it++)
62
39.9k
    {
63
39.9k
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
39.9k
      if (ch >= 0x10000)
67
1.90k
  it++;
68
69
39.9k
      int canonicalClass = uc_combining_class (ch);
70
39.9k
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
81
  return 0;
72
73
39.8k
      NFCQCMap *map = get_nfcqc_map (ch);
74
39.8k
      if (map)
75
720
  {
76
720
    if (map->check)
77
720
      return 0;
78
0
    result = -1;
79
0
  }
80
81
39.1k
      lastCanonicalClass = canonicalClass;
82
39.1k
    }
83
84
4.05k
  return result;
85
4.85k
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
21.0k
{
91
21.0k
  uint32_t *p;
92
21.0k
  size_t plen;
93
94
21.0k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
21.0k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
21.0k
  if (nfc && !_isNFC (p, plen))
103
801
    {
104
801
      size_t tmplen;
105
801
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
801
      free (p);
107
801
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
801
      p = tmp;
115
801
      plen = tmplen;
116
801
    }
117
118
21.0k
  *out = p;
119
21.0k
  *outlen = plen;
120
21.0k
  return IDN2_OK;
121
21.0k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
33.2k
{
126
33.2k
  size_t i;
127
128
179k
  for (i = 0; i < srclen; i++)
129
163k
    if (src[i] >= 0x80)
130
16.8k
      return false;
131
132
16.3k
  return true;
133
33.2k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
49.4k
{
138
49.4k
  if (what & TEST_NFC)
139
49.4k
    {
140
49.4k
      size_t plen;
141
49.4k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
49.4k
           NULL, &plen);
143
49.4k
      int ok;
144
49.4k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
49.4k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
49.4k
      free (p);
152
49.4k
      if (!ok)
153
2.25k
  return IDN2_NOT_NFC;
154
49.4k
    }
155
156
47.2k
  if (what & TEST_2HYPHEN)
157
47.2k
    {
158
47.2k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
408
  return IDN2_2HYPHEN;
160
47.2k
    }
161
162
46.8k
  if (what & TEST_HYPHEN_STARTEND)
163
33.9k
    {
164
33.9k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
1.09k
  return IDN2_HYPHEN_STARTEND;
166
33.9k
    }
167
168
45.7k
  if (what & TEST_LEADING_COMBINING)
169
45.7k
    {
170
45.7k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
1.14k
  return IDN2_LEADING_COMBINING;
172
45.7k
    }
173
174
44.5k
  if (what & TEST_DISALLOWED)
175
14.4k
    {
176
14.4k
      size_t i;
177
140k
      for (i = 0; i < llen; i++)
178
128k
  if (_idn2_disallowed_p (label[i]))
179
17.7k
    {
180
17.7k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
17.3k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
16.9k
        {
183
16.9k
    IDNAMap map;
184
16.9k
    get_idna_map (label[i], &map);
185
16.9k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
2.71k
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
15.7k
      continue;
188
189
16.9k
        }
190
191
1.99k
      return IDN2_DISALLOWED;
192
17.7k
    }
193
14.4k
    }
194
195
42.5k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
42.5k
  if (what & TEST_CONTEXTJ_RULE)
204
12.4k
    {
205
12.4k
      size_t i;
206
12.4k
      int rc;
207
208
128k
      for (i = 0; i < llen; i++)
209
117k
  {
210
117k
    rc = _idn2_contextj_rule (label, llen, i);
211
117k
    if (rc != IDN2_OK)
212
964
      return rc;
213
117k
  }
214
12.4k
    }
215
216
41.6k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
41.6k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
10.4k
    {
226
10.4k
      size_t i;
227
110k
      for (i = 0; i < llen; i++)
228
100k
  if (_idn2_contexto_p (label[i])
229
6.27k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
10.4k
    }
232
233
41.6k
  if (what & TEST_CONTEXTO_RULE)
234
1.06k
    {
235
1.06k
      size_t i;
236
1.06k
      int rc;
237
238
7.83k
      for (i = 0; i < llen; i++)
239
6.91k
  {
240
6.91k
    rc = _idn2_contexto_rule (label, llen, i);
241
6.91k
    if (rc != IDN2_OK)
242
147
      return rc;
243
6.91k
  }
244
1.06k
    }
245
246
41.4k
  if (what & TEST_UNASSIGNED)
247
11.3k
    {
248
11.3k
      size_t i;
249
117k
      for (i = 0; i < llen; i++)
250
106k
  if (_idn2_unassigned_p (label[i]))
251
149
    return IDN2_UNASSIGNED;
252
11.3k
    }
253
254
41.3k
  if (what & TEST_BIDI)
255
11.2k
    {
256
11.2k
      int rc = _idn2_bidi (label, llen);
257
11.2k
      if (rc != IDN2_OK)
258
1.79k
  return rc;
259
11.2k
    }
260
261
39.5k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
38.8k
    {
263
38.8k
      size_t i;
264
38.8k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
296k
      for (i = 0; i < llen; i++)
268
257k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
291k
      for (i = 0; i < llen; i++)
276
255k
  {
277
255k
    IDNAMap map;
278
279
255k
    get_idna_map (label[i], &map);
280
281
255k
    if (map_is (&map, TR46_FLG_VALID) ||
282
41.9k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
221k
      continue;
284
285
33.8k
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
32.7k
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
4.65k
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
30.7k
      continue;
289
290
3.07k
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
3.07k
      IDN2_INVALID_NONTRANSITIONAL;
292
33.8k
  }
293
38.8k
    }
294
295
36.4k
  return IDN2_OK;
296
39.5k
}