Coverage Report

Created: 2026-03-31 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
0
{
57
0
  int lastCanonicalClass = 0;
58
0
  int result = 1;
59
0
  size_t it;
60
61
0
  for (it = 0; it < len; it++)
62
0
    {
63
0
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
0
      if (ch >= 0x10000)
67
0
  it++;
68
69
0
      int canonicalClass = uc_combining_class (ch);
70
0
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
0
  return 0;
72
73
0
      NFCQCMap *map = get_nfcqc_map (ch);
74
0
      if (map)
75
0
  {
76
0
    if (map->check)
77
0
      return 0;
78
0
    result = -1;
79
0
  }
80
81
0
      lastCanonicalClass = canonicalClass;
82
0
    }
83
84
0
  return result;
85
0
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
65.7k
{
91
65.7k
  uint32_t *p;
92
65.7k
  size_t plen;
93
94
65.7k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
65.7k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
65.7k
  if (nfc && !_isNFC (p, plen))
103
0
    {
104
0
      size_t tmplen;
105
0
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
0
      free (p);
107
0
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
0
      p = tmp;
115
0
      plen = tmplen;
116
0
    }
117
118
65.7k
  *out = p;
119
65.7k
  *outlen = plen;
120
65.7k
  return IDN2_OK;
121
65.7k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
73.7k
{
126
73.7k
  size_t i;
127
128
357k
  for (i = 0; i < srclen; i++)
129
337k
    if (src[i] >= 0x80)
130
54.1k
      return false;
131
132
19.6k
  return true;
133
73.7k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
130k
{
138
130k
  if (what & TEST_NFC)
139
130k
    {
140
130k
      size_t plen;
141
130k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
130k
           NULL, &plen);
143
130k
      int ok;
144
130k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
130k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
130k
      free (p);
152
130k
      if (!ok)
153
3.74k
  return IDN2_NOT_NFC;
154
130k
    }
155
156
126k
  if (what & TEST_2HYPHEN)
157
126k
    {
158
126k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
1.05k
  return IDN2_2HYPHEN;
160
126k
    }
161
162
125k
  if (what & TEST_HYPHEN_STARTEND)
163
89.7k
    {
164
89.7k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
3.32k
  return IDN2_HYPHEN_STARTEND;
166
89.7k
    }
167
168
122k
  if (what & TEST_LEADING_COMBINING)
169
122k
    {
170
122k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
1.76k
  return IDN2_LEADING_COMBINING;
172
122k
    }
173
174
120k
  if (what & TEST_DISALLOWED)
175
35.9k
    {
176
35.9k
      size_t i;
177
223k
      for (i = 0; i < llen; i++)
178
196k
  if (_idn2_disallowed_p (label[i]))
179
8.25k
    {
180
8.25k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
8.25k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
0
        {
183
0
    IDNAMap map;
184
0
    get_idna_map (label[i], &map);
185
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
0
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
0
      continue;
188
189
0
        }
190
191
8.25k
      return IDN2_DISALLOWED;
192
8.25k
    }
193
35.9k
    }
194
195
112k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
112k
  if (what & TEST_CONTEXTJ_RULE)
204
27.6k
    {
205
27.6k
      size_t i;
206
27.6k
      int rc;
207
208
193k
      for (i = 0; i < llen; i++)
209
169k
  {
210
169k
    rc = _idn2_contextj_rule (label, llen, i);
211
169k
    if (rc != IDN2_OK)
212
4.15k
      return rc;
213
169k
  }
214
27.6k
    }
215
216
108k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
108k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
23.5k
    {
226
23.5k
      size_t i;
227
174k
      for (i = 0; i < llen; i++)
228
151k
  if (_idn2_contexto_p (label[i])
229
22.6k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
23.5k
    }
232
233
108k
  if (what & TEST_CONTEXTO_RULE)
234
0
    {
235
0
      size_t i;
236
0
      int rc;
237
238
0
      for (i = 0; i < llen; i++)
239
0
  {
240
0
    rc = _idn2_contexto_rule (label, llen, i);
241
0
    if (rc != IDN2_OK)
242
0
      return rc;
243
0
  }
244
0
    }
245
246
108k
  if (what & TEST_UNASSIGNED)
247
23.5k
    {
248
23.5k
      size_t i;
249
174k
      for (i = 0; i < llen; i++)
250
151k
  if (_idn2_unassigned_p (label[i]))
251
0
    return IDN2_UNASSIGNED;
252
23.5k
    }
253
254
108k
  if (what & TEST_BIDI)
255
23.5k
    {
256
23.5k
      int rc = _idn2_bidi (label, llen);
257
23.5k
      if (rc != IDN2_OK)
258
4.18k
  return rc;
259
23.5k
    }
260
261
103k
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
103k
    {
263
103k
      size_t i;
264
103k
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
832k
      for (i = 0; i < llen; i++)
268
728k
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
817k
      for (i = 0; i < llen; i++)
276
717k
  {
277
717k
    IDNAMap map;
278
279
717k
    get_idna_map (label[i], &map);
280
281
717k
    if (map_is (&map, TR46_FLG_VALID) ||
282
29.0k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
713k
      continue;
284
285
4.88k
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
0
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
0
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
0
      continue;
289
290
4.88k
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
4.88k
      IDN2_INVALID_NONTRANSITIONAL;
292
4.88k
  }
293
103k
    }
294
295
99.0k
  return IDN2_OK;
296
103k
}