Coverage Report

Created: 2026-02-14 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
4.62k
{
57
4.62k
  int lastCanonicalClass = 0;
58
4.62k
  int result = 1;
59
4.62k
  size_t it;
60
61
45.6k
  for (it = 0; it < len; it++)
62
42.1k
    {
63
42.1k
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
42.1k
      if (ch >= 0x10000)
67
1.85k
  it++;
68
69
42.1k
      int canonicalClass = uc_combining_class (ch);
70
42.1k
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
85
  return 0;
72
73
42.0k
      NFCQCMap *map = get_nfcqc_map (ch);
74
42.0k
      if (map)
75
1.02k
  {
76
1.02k
    if (map->check)
77
1.02k
      return 0;
78
0
    result = -1;
79
0
  }
80
81
40.9k
      lastCanonicalClass = canonicalClass;
82
40.9k
    }
83
84
3.51k
  return result;
85
4.62k
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
4.62k
{
91
4.62k
  uint32_t *p;
92
4.62k
  size_t plen;
93
94
4.62k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
4.62k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
4.62k
  if (nfc && !_isNFC (p, plen))
103
1.11k
    {
104
1.11k
      size_t tmplen;
105
1.11k
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
1.11k
      free (p);
107
1.11k
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
1.11k
      p = tmp;
115
1.11k
      plen = tmplen;
116
1.11k
    }
117
118
4.62k
  *out = p;
119
4.62k
  *outlen = plen;
120
4.62k
  return IDN2_OK;
121
4.62k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
5.87k
{
126
5.87k
  size_t i;
127
128
28.3k
  for (i = 0; i < srclen; i++)
129
26.2k
    if (src[i] >= 0x80)
130
3.84k
      return false;
131
132
2.03k
  return true;
133
5.87k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
10.2k
{
138
10.2k
  if (what & TEST_NFC)
139
10.2k
    {
140
10.2k
      size_t plen;
141
10.2k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
10.2k
           NULL, &plen);
143
10.2k
      int ok;
144
10.2k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
10.2k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
10.2k
      free (p);
152
10.2k
      if (!ok)
153
162
  return IDN2_NOT_NFC;
154
10.2k
    }
155
156
10.0k
  if (what & TEST_2HYPHEN)
157
10.0k
    {
158
10.0k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
66
  return IDN2_2HYPHEN;
160
10.0k
    }
161
162
9.97k
  if (what & TEST_HYPHEN_STARTEND)
163
7.00k
    {
164
7.00k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
184
  return IDN2_HYPHEN_STARTEND;
166
7.00k
    }
167
168
9.79k
  if (what & TEST_LEADING_COMBINING)
169
9.79k
    {
170
9.79k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
345
  return IDN2_LEADING_COMBINING;
172
9.79k
    }
173
174
9.45k
  if (what & TEST_DISALLOWED)
175
2.97k
    {
176
2.97k
      size_t i;
177
33.1k
      for (i = 0; i < llen; i++)
178
30.9k
  if (_idn2_disallowed_p (label[i]))
179
775
    {
180
775
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
775
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
0
        {
183
0
    IDNAMap map;
184
0
    get_idna_map (label[i], &map);
185
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
0
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
0
      continue;
188
189
0
        }
190
191
775
      return IDN2_DISALLOWED;
192
775
    }
193
2.97k
    }
194
195
8.67k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
8.67k
  if (what & TEST_CONTEXTJ_RULE)
204
2.19k
    {
205
2.19k
      size_t i;
206
2.19k
      int rc;
207
208
26.4k
      for (i = 0; i < llen; i++)
209
24.3k
  {
210
24.3k
    rc = _idn2_contextj_rule (label, llen, i);
211
24.3k
    if (rc != IDN2_OK)
212
118
      return rc;
213
24.3k
  }
214
2.19k
    }
215
216
8.55k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
8.55k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
2.07k
    {
226
2.07k
      size_t i;
227
23.9k
      for (i = 0; i < llen; i++)
228
21.8k
  if (_idn2_contexto_p (label[i])
229
5.77k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
2.07k
    }
232
233
8.55k
  if (what & TEST_CONTEXTO_RULE)
234
0
    {
235
0
      size_t i;
236
0
      int rc;
237
238
0
      for (i = 0; i < llen; i++)
239
0
  {
240
0
    rc = _idn2_contexto_rule (label, llen, i);
241
0
    if (rc != IDN2_OK)
242
0
      return rc;
243
0
  }
244
0
    }
245
246
8.55k
  if (what & TEST_UNASSIGNED)
247
2.07k
    {
248
2.07k
      size_t i;
249
23.9k
      for (i = 0; i < llen; i++)
250
21.8k
  if (_idn2_unassigned_p (label[i]))
251
0
    return IDN2_UNASSIGNED;
252
2.07k
    }
253
254
8.55k
  if (what & TEST_BIDI)
255
2.07k
    {
256
2.07k
      int rc = _idn2_bidi (label, llen);
257
2.07k
      if (rc != IDN2_OK)
258
165
  return rc;
259
2.07k
    }
260
261
8.39k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
8.39k
    {
263
8.39k
      size_t i;
264
8.39k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
85.2k
      for (i = 0; i < llen; i++)
268
76.8k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
84.3k
      for (i = 0; i < llen; i++)
276
76.2k
  {
277
76.2k
    IDNAMap map;
278
279
76.2k
    get_idna_map (label[i], &map);
280
281
76.2k
    if (map_is (&map, TR46_FLG_VALID) ||
282
2.15k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
75.9k
      continue;
284
285
313
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
0
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
0
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
0
      continue;
289
290
313
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
313
      IDN2_INVALID_NONTRANSITIONAL;
292
313
  }
293
8.39k
    }
294
295
8.07k
  return IDN2_OK;
296
8.39k
}