Coverage Report

Created: 2026-03-24 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/idna.c
Line
Count
Source
1
/* idna.c - implementation of high-level IDNA processing function
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include <stdlib.h>   /* free */
32
#include <errno.h>    /* errno */
33
34
#include "idn2.h"
35
#include "bidi.h"
36
#include "tables.h"
37
#include "context.h"
38
#include "tr46map.h"
39
40
#include <unitypes.h>
41
#include <unictype.h>   /* uc_is_general_category, UC_CATEGORY_M */
42
#include <uninorm.h>    /* u32_normalize */
43
#include <unistr.h>   /* u8_to_u32 */
44
45
#include "idna.h"
46
47
/*
48
 * NFC Quick Check from
49
 * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50
 *
51
 * They say, this is much faster than 'brute force' normalization.
52
 * Strings are very likely already in NFC form.
53
 */
54
G_GNUC_IDN2_ATTRIBUTE_PURE static int
55
_isNFC (uint32_t *label, size_t len)
56
0
{
57
0
  int lastCanonicalClass = 0;
58
0
  int result = 1;
59
0
  size_t it;
60
61
0
  for (it = 0; it < len; it++)
62
0
    {
63
0
      uint32_t ch = label[it];
64
65
      // supplementary code point
66
0
      if (ch >= 0x10000)
67
0
  it++;
68
69
0
      int canonicalClass = uc_combining_class (ch);
70
0
      if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71
0
  return 0;
72
73
0
      NFCQCMap *map = get_nfcqc_map (ch);
74
0
      if (map)
75
0
  {
76
0
    if (map->check)
77
0
      return 0;
78
0
    result = -1;
79
0
  }
80
81
0
      lastCanonicalClass = canonicalClass;
82
0
    }
83
84
0
  return result;
85
0
}
86
87
int
88
_idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89
         uint32_t **out, size_t *outlen, int nfc)
90
2.02k
{
91
2.02k
  uint32_t *p;
92
2.02k
  size_t plen;
93
94
2.02k
  p = u8_to_u32 (src, srclen, NULL, &plen);
95
2.02k
  if (p == NULL)
96
0
    {
97
0
      if (errno == ENOMEM)
98
0
  return IDN2_MALLOC;
99
0
      return IDN2_ENCODING_ERROR;
100
0
    }
101
102
2.02k
  if (nfc && !_isNFC (p, plen))
103
0
    {
104
0
      size_t tmplen;
105
0
      uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106
0
      free (p);
107
0
      if (tmp == NULL)
108
0
  {
109
0
    if (errno == ENOMEM)
110
0
      return IDN2_MALLOC;
111
0
    return IDN2_NFC;
112
0
  }
113
114
0
      p = tmp;
115
0
      plen = tmplen;
116
0
    }
117
118
2.02k
  *out = p;
119
2.02k
  *outlen = plen;
120
2.02k
  return IDN2_OK;
121
2.02k
}
122
123
bool
124
_idn2_ascii_p (const uint8_t *src, size_t srclen)
125
6.04k
{
126
6.04k
  size_t i;
127
128
67.0k
  for (i = 0; i < srclen; i++)
129
62.9k
    if (src[i] >= 0x80)
130
2.03k
      return false;
131
132
4.01k
  return true;
133
6.04k
}
134
135
int
136
_idn2_label_test (int what, const uint32_t *label, size_t llen)
137
2.02k
{
138
2.02k
  if (what & TEST_NFC)
139
2.02k
    {
140
2.02k
      size_t plen;
141
2.02k
      uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142
2.02k
           NULL, &plen);
143
2.02k
      int ok;
144
2.02k
      if (p == NULL)
145
0
  {
146
0
    if (errno == ENOMEM)
147
0
      return IDN2_MALLOC;
148
0
    return IDN2_NFC;
149
0
  }
150
2.02k
      ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151
2.02k
      free (p);
152
2.02k
      if (!ok)
153
393
  return IDN2_NOT_NFC;
154
2.02k
    }
155
156
1.63k
  if (what & TEST_2HYPHEN)
157
1.63k
    {
158
1.63k
      if (llen >= 4 && label[2] == '-' && label[3] == '-')
159
4
  return IDN2_2HYPHEN;
160
1.63k
    }
161
162
1.62k
  if (what & TEST_HYPHEN_STARTEND)
163
1.62k
    {
164
1.62k
      if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165
9
  return IDN2_HYPHEN_STARTEND;
166
1.62k
    }
167
168
1.61k
  if (what & TEST_LEADING_COMBINING)
169
1.61k
    {
170
1.61k
      if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171
29
  return IDN2_LEADING_COMBINING;
172
1.61k
    }
173
174
1.59k
  if (what & TEST_DISALLOWED)
175
1.59k
    {
176
1.59k
      size_t i;
177
9.75k
      for (i = 0; i < llen; i++)
178
8.57k
  if (_idn2_disallowed_p (label[i]))
179
413
    {
180
413
      if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181
0
    (what & TEST_ALLOW_STD3_DISALLOWED))
182
0
        {
183
0
    IDNAMap map;
184
0
    get_idna_map (label[i], &map);
185
0
    if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186
0
        map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187
0
      continue;
188
189
0
        }
190
191
413
      return IDN2_DISALLOWED;
192
413
    }
193
1.59k
    }
194
195
1.17k
  if (what & TEST_CONTEXTJ)
196
0
    {
197
0
      size_t i;
198
0
      for (i = 0; i < llen; i++)
199
0
  if (_idn2_contextj_p (label[i]))
200
0
    return IDN2_CONTEXTJ;
201
0
    }
202
203
1.17k
  if (what & TEST_CONTEXTJ_RULE)
204
1.17k
    {
205
1.17k
      size_t i;
206
1.17k
      int rc;
207
208
8.76k
      for (i = 0; i < llen; i++)
209
7.69k
  {
210
7.69k
    rc = _idn2_contextj_rule (label, llen, i);
211
7.69k
    if (rc != IDN2_OK)
212
111
      return rc;
213
7.69k
  }
214
1.17k
    }
215
216
1.06k
  if (what & TEST_CONTEXTO)
217
0
    {
218
0
      size_t i;
219
0
      for (i = 0; i < llen; i++)
220
0
  if (_idn2_contexto_p (label[i]))
221
0
    return IDN2_CONTEXTO;
222
0
    }
223
224
1.06k
  if (what & TEST_CONTEXTO_WITH_RULE)
225
0
    {
226
0
      size_t i;
227
0
      for (i = 0; i < llen; i++)
228
0
  if (_idn2_contexto_p (label[i])
229
0
      && !_idn2_contexto_with_rule (label[i]))
230
0
    return IDN2_CONTEXTO_NO_RULE;
231
0
    }
232
233
1.06k
  if (what & TEST_CONTEXTO_RULE)
234
1.06k
    {
235
1.06k
      size_t i;
236
1.06k
      int rc;
237
238
7.83k
      for (i = 0; i < llen; i++)
239
6.91k
  {
240
6.91k
    rc = _idn2_contexto_rule (label, llen, i);
241
6.91k
    if (rc != IDN2_OK)
242
147
      return rc;
243
6.91k
  }
244
1.06k
    }
245
246
919
  if (what & TEST_UNASSIGNED)
247
919
    {
248
919
      size_t i;
249
6.65k
      for (i = 0; i < llen; i++)
250
5.88k
  if (_idn2_unassigned_p (label[i]))
251
149
    return IDN2_UNASSIGNED;
252
919
    }
253
254
770
  if (what & TEST_BIDI)
255
770
    {
256
770
      int rc = _idn2_bidi (label, llen);
257
770
      if (rc != IDN2_OK)
258
155
  return rc;
259
770
    }
260
261
615
  if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262
0
    {
263
0
      size_t i;
264
0
      int transitional = what & TEST_TRANSITIONAL;
265
266
      /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267
0
      for (i = 0; i < llen; i++)
268
0
  if (label[i] == 0x002E)
269
0
    return IDN2_DOT_IN_LABEL;
270
271
      /* TR46: 6. Each code point in the label must only have certain status
272
       * values according to Section 5, IDNA Mapping Table:
273
       *    a. For Transitional Processing, each value must be valid.
274
       *    b. For Nontransitional Processing, each value must be either valid or deviation. */
275
0
      for (i = 0; i < llen; i++)
276
0
  {
277
0
    IDNAMap map;
278
279
0
    get_idna_map (label[i], &map);
280
281
0
    if (map_is (&map, TR46_FLG_VALID) ||
282
0
        (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283
0
      continue;
284
285
0
    if (what & TEST_ALLOW_STD3_DISALLOWED &&
286
0
        (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287
0
         map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288
0
      continue;
289
290
0
    return transitional ? IDN2_INVALID_TRANSITIONAL :
291
0
      IDN2_INVALID_NONTRANSITIONAL;
292
0
  }
293
0
    }
294
295
615
  return IDN2_OK;
296
615
}