Coverage Report

Created: 2026-03-31 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
0
{
57
0
  int lastCanonicalClass = 0;
58
0
  int result = 1;
59
0
  size_t it;
60
61
0
  for (it = 0; it < len; it++)
62
0
    {
63
0
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
0
      if (ch >= 0x10000)
67
0
  it++;
68
69
0
      int canonicalClass = uc_combining_class (ch);
70
0
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
0
  return 0;
72
73
0
      NFCQCMap *map = get_nfcqc_map (ch);
74
0
      if (map)
75
0
  {
76
0
    if (map->check)
77
0
      return 0;
78
0
    result = -1;
79
0
  }
80
81
0
      lastCanonicalClass = canonicalClass;
82
0
    }
83
84
0
  return result;
85
0
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
1.53M
{
91
1.53M
  uint32_t *p;
92
1.53M
  size_t plen;
93
94
1.53M
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
1.53M
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
1.53M
  if (nfc && !_isNFC (p, plen))
103
0
    {
104
0
      size_t tmplen;
105
0
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
0
      free (p);
107
0
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
0
      p = tmp;
115
0
      plen = tmplen;
116
0
    }
117
118
1.53M
  *out = p;
119
1.53M
  *outlen = plen;
120
1.53M
  return IDN2_OK;
121
1.53M
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
3.04M
{
126
3.04M
  size_t i;
127
128
3.23M
  for (i = 0; i < srclen; i++)
129
1.72M
    if (src[i] >= 0x80)
130
1.53M
      return false;
131
132
1.51M
  return true;
133
3.04M
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
3.26M
{
138
3.26M
  if (what & TEST_NFC)
139
3.26M
    {
140
3.26M
      size_t plen;
141
3.26M
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
3.26M
           NULL, &plen);
143
3.26M
      int ok;
144
3.26M
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
3.26M
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
3.26M
      free (p);
152
3.26M
      if (!ok)
153
448
  return IDN2_NOT_NFC;
154
3.26M
    }
155
156
3.26M
  if (what & TEST_2HYPHEN)
157
3.26M
    {
158
3.26M
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
1.01k
  return IDN2_2HYPHEN;
160
3.26M
    }
161
162
3.26M
  if (what & TEST_HYPHEN_STARTEND)
163
1.72M
    {
164
1.72M
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
658
  return IDN2_HYPHEN_STARTEND;
166
1.72M
    }
167
168
3.26M
  if (what & TEST_LEADING_COMBINING)
169
3.26M
    {
170
3.26M
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
2.51k
  return IDN2_LEADING_COMBINING;
172
3.26M
    }
173
174
3.25M
  if (what & TEST_DISALLOWED)
175
1.53M
    {
176
1.53M
      size_t i;
177
3.33M
      for (i = 0; i < llen; i++)
178
1.80M
  if (_idn2_disallowed_p (label[i]))
179
21.1k
    {
180
21.1k
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
21.1k
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
21.1k
        {
183
21.1k
    IDNAMap map;
184
21.1k
    get_idna_map (label[i], &map);
185
21.1k
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
4.31k
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
18.0k
      continue;
188
189
21.1k
        }
190
191
3.07k
      return IDN2_DISALLOWED;
192
21.1k
    }
193
1.53M
    }
194
195
3.25M
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
3.25M
  if (what & TEST_CONTEXTJ_RULE)
204
1.53M
    {
205
1.53M
      size_t i;
206
1.53M
      int rc;
207
208
3.33M
      for (i = 0; i < llen; i++)
209
1.79M
  {
210
1.79M
    rc = _idn2_contextj_rule (label, llen, i);
211
1.79M
    if (rc != IDN2_OK)
212
2.17k
      return rc;
213
1.79M
  }
214
1.53M
    }
215
216
3.25M
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
3.25M
  if (what & TEST_CONTEXTO_WITH_RULE)
225
1.53M
    {
226
1.53M
      size_t i;
227
3.32M
      for (i = 0; i < llen; i++)
228
1.79M
  if (_idn2_contexto_p (label[i])
229
280k
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
1.53M
    }
232
233
3.25M
  if (what & TEST_CONTEXTO_RULE)
234
0
    {
235
0
      size_t i;
236
0
      int rc;
237
238
0
      for (i = 0; i < llen; i++)
239
0
  {
240
0
    rc = _idn2_contexto_rule (label, llen, i);
241
0
    if (rc != IDN2_OK)
242
0
      return rc;
243
0
  }
244
0
    }
245
246
3.25M
  if (what & TEST_UNASSIGNED)
247
1.53M
    {
248
1.53M
      size_t i;
249
3.32M
      for (i = 0; i < llen; i++)
250
1.79M
  if (_idn2_unassigned_p (label[i]))
251
0
    return IDN2_UNASSIGNED;
252
1.53M
    }
253
254
3.25M
  if (what & TEST_BIDI)
255
1.53M
    {
256
1.53M
      int rc = _idn2_bidi (label, llen);
257
1.53M
      if (rc != IDN2_OK)
258
5.82k
  return rc;
259
1.53M
    }
260
261
3.24M
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
3.24M
    {
263
3.24M
      size_t i;
264
3.24M
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
6.95M
      for (i = 0; i < llen; i++)
268
3.70M
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
6.95M
      for (i = 0; i < llen; i++)
276
3.70M
  {
277
3.70M
    IDNAMap map;
278
279
3.70M
    get_idna_map (label[i], &map);
280
281
3.70M
    if (map_is (&map, TR46_FLG_VALID) ||
282
39.4k
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
3.67M
      continue;
284
285
32.9k
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
32.9k
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
3.04k
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
32.3k
      continue;
289
290
567
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
567
      IDN2_INVALID_NONTRANSITIONAL;
292
32.9k
  }
293
3.24M
    }
294
295
3.24M
  return IDN2_OK;
296
3.24M
}