Coverage Report

Created: 2025-11-09 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/lib/context.c
Line
Count
Source
1
/* context.c - check contextual rule on label
2
   Copyright (C) 2011-2025 Simon Josefsson
3
4
   Libidn2 is free software: you can redistribute it and/or modify it
5
   under the terms of either:
6
7
     * the GNU Lesser General Public License as published by the Free
8
       Software Foundation; either version 3 of the License, or (at
9
       your option) any later version.
10
11
   or
12
13
     * the GNU General Public License as published by the Free
14
       Software Foundation; either version 2 of the License, or (at
15
       your option) any later version.
16
17
   or both in parallel, as here.
18
19
   This program is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
   GNU General Public License for more details.
23
24
   You should have received copies of the GNU General Public License and
25
   the GNU Lesser General Public License along with this program.  If
26
   not, see <http://www.gnu.org/licenses/>.
27
*/
28
29
#include <config.h>
30
31
#include "idn2.h"
32
#include "tables.h"
33
#include <unictype.h>   /* uc_combining_class, UC_CCC_VR */
34
#include "context.h"
35
36
int
37
_idn2_contextj_rule (const uint32_t *label, size_t llen, size_t pos)
38
112k
{
39
112k
  uint32_t cp;
40
41
112k
  if (llen == 0)
42
0
    return IDN2_OK;
43
44
112k
  cp = label[pos];
45
46
112k
  if (!_idn2_contextj_p (cp))
47
110k
    return IDN2_OK;
48
49
2.56k
  switch (cp)
50
2.56k
    {
51
2.17k
    case 0x200C:    /* ZERO WIDTH NON-JOINER */
52
2.17k
      if (pos > 0)
53
1.99k
  {
54
    /* If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True; */
55
1.99k
    uint32_t before_cp = label[pos - 1];
56
1.99k
    int cc = uc_combining_class (before_cp);
57
1.99k
    if (cc == UC_CCC_VR)
58
240
      return IDN2_OK;
59
1.99k
  }
60
61
      /* See http://permalink.gmane.org/gmane.ietf.idnabis/6980 for
62
         clarified rule. */
63
64
1.93k
      if (pos == 0 || pos == llen - 1)
65
317
  return IDN2_CONTEXTJ;
66
67
1.62k
      {
68
1.62k
  int jt;
69
1.62k
  size_t tmp;
70
71
  /* Search backwards. */
72
1.62k
  for (tmp = pos - 1;; tmp--)
73
2.12k
    {
74
2.12k
      jt = uc_joining_type (label[tmp]);
75
2.12k
      if (jt == UC_JOINING_TYPE_L || jt == UC_JOINING_TYPE_D)
76
1.25k
        break;
77
869
      if (tmp == 0)
78
292
        return IDN2_CONTEXTJ;
79
577
      if (jt == UC_JOINING_TYPE_T)
80
508
        continue;
81
69
      return IDN2_CONTEXTJ;
82
577
    }
83
84
  /* Search forward. */
85
1.94k
  for (tmp = pos + 1; tmp < llen; tmp++)
86
1.94k
    {
87
1.94k
      jt = uc_joining_type (label[tmp]);
88
1.94k
      if (jt == UC_JOINING_TYPE_R || jt == UC_JOINING_TYPE_D)
89
1.06k
        break;
90
879
      if (tmp == llen - 1)
91
147
        return IDN2_CONTEXTJ;
92
732
      if (jt == UC_JOINING_TYPE_T)
93
687
        continue;
94
45
      return IDN2_CONTEXTJ;
95
732
    }
96
1.25k
      }
97
98
1.06k
      return IDN2_OK;
99
0
      break;
100
101
392
    case 0x200D:    /* ZERO WIDTH JOINER */
102
392
      if (pos > 0)
103
342
  {
104
342
    uint32_t before_cp = label[pos - 1];
105
342
    int cc = uc_combining_class (before_cp);
106
342
    if (cc == UC_CCC_VR)
107
236
      return IDN2_OK;
108
342
  }
109
156
      return IDN2_CONTEXTJ;
110
2.56k
    }
111
112
0
  return IDN2_CONTEXTJ_NO_RULE;
113
2.56k
}
114
115
static const char *
116
_uc_script_name (ucs4_t uc)
117
16.6k
{
118
16.6k
  const uc_script_t *ucs = uc_script (uc);
119
120
16.6k
  if (!ucs)
121
2.05k
    return "";
122
123
14.6k
  return ucs->name;
124
16.6k
}
125
126
int
127
_idn2_contexto_rule (const uint32_t *label, size_t llen, size_t pos)
128
6.38k
{
129
6.38k
  uint32_t cp = label[pos];
130
131
6.38k
  if (!_idn2_contexto_p (cp))
132
3.85k
    return IDN2_OK;
133
134
2.53k
  switch (cp)
135
2.53k
    {
136
81
    case 0x00B7:
137
      /* MIDDLE DOT */
138
81
      if (llen < 3)
139
1
  return IDN2_CONTEXTO;
140
80
      if (pos == 0 || pos == llen - 1)
141
6
  return IDN2_CONTEXTO;
142
74
      if (label[pos - 1] == 0x006C && label[pos + 1] == 0x006C)
143
34
  return IDN2_OK;
144
40
      return IDN2_CONTEXTO;
145
0
      break;
146
147
85
    case 0x0375:
148
      /* GREEK LOWER NUMERAL SIGN (KERAIA) */
149
85
      if (pos == llen - 1)
150
8
  return IDN2_CONTEXTO;
151
77
      if (strcmp (_uc_script_name (label[pos + 1]), "Greek") == 0)
152
68
  return IDN2_OK;
153
9
      return IDN2_CONTEXTO;
154
0
      break;
155
156
74
    case 0x05F3:
157
      /* HEBREW PUNCTUATION GERESH */
158
143
    case 0x05F4:
159
      /* HEBREW PUNCTUATION GERSHAYIM */
160
143
      if (pos == 0)
161
3
  return IDN2_CONTEXTO;
162
140
      if (strcmp (_uc_script_name (label[pos - 1]), "Hebrew") == 0)
163
131
  return IDN2_OK;
164
9
      return IDN2_CONTEXTO;
165
0
      break;
166
167
91
    case 0x0660:
168
162
    case 0x0661:
169
242
    case 0x0662:
170
326
    case 0x0663:
171
416
    case 0x0664:
172
515
    case 0x0665:
173
604
    case 0x0666:
174
699
    case 0x0667:
175
795
    case 0x0668:
176
870
    case 0x0669:
177
870
      {
178
  /* ARABIC-INDIC DIGITS */
179
870
  size_t i;
180
18.2k
  for (i = 0; i < llen; i++)
181
17.4k
    if (label[i] >= 0x6F0 && label[i] <= 0x06F9)
182
5
      return IDN2_CONTEXTO;
183
865
  return IDN2_OK;
184
0
  break;
185
870
      }
186
187
148
    case 0x06F0:
188
275
    case 0x06F1:
189
361
    case 0x06F2:
190
449
    case 0x06F3:
191
540
    case 0x06F4:
192
643
    case 0x06F5:
193
720
    case 0x06F6:
194
813
    case 0x06F7:
195
890
    case 0x06F8:
196
977
    case 0x06F9:
197
977
      {
198
  /* EXTENDED ARABIC-INDIC DIGITS */
199
977
  size_t i;
200
20.8k
  for (i = 0; i < llen; i++)
201
19.8k
    if (label[i] >= 0x660 && label[i] <= 0x0669)
202
4
      return IDN2_CONTEXTO;
203
973
  return IDN2_OK;
204
0
  break;
205
977
      }
206
374
    case 0x30FB:
207
374
      {
208
  /* KATAKANA MIDDLE DOT */
209
374
  size_t i;
210
374
  bool script_ok = false;
211
212
5.95k
  for (i = 0; !script_ok && i < llen; i++)
213
5.58k
    if (strcmp (_uc_script_name (label[i]), "Hiragana") == 0
214
5.51k
        || strcmp (_uc_script_name (label[i]), "Katakana") == 0
215
5.33k
        || strcmp (_uc_script_name (label[i]), "Han") == 0)
216
318
      script_ok = true;
217
218
374
  if (script_ok)
219
318
    return IDN2_OK;
220
56
  return IDN2_CONTEXTO;
221
0
  break;
222
374
      }
223
2.53k
    }
224
225
0
  return IDN2_CONTEXTO_NO_RULE;
226
2.53k
}
227
228
bool
229
_idn2_contexto_with_rule (uint32_t cp)
230
6.27k
{
231
6.27k
  switch (cp)
232
6.27k
    {
233
251
    case 0x00B7:
234
      /* MIDDLE DOT */
235
449
    case 0x0375:
236
      /* GREEK LOWER NUMERAL SIGN (KERAIA) */
237
691
    case 0x05F3:
238
      /* HEBREW PUNCTUATION GERESH */
239
1.17k
    case 0x05F4:
240
      /* HEBREW PUNCTUATION GERSHAYIM */
241
1.58k
    case 0x0660:
242
1.81k
    case 0x0661:
243
2.05k
    case 0x0662:
244
2.28k
    case 0x0663:
245
2.50k
    case 0x0664:
246
2.72k
    case 0x0665:
247
2.97k
    case 0x0666:
248
3.18k
    case 0x0667:
249
3.46k
    case 0x0668:
250
3.67k
    case 0x0669:
251
      /* ARABIC-INDIC DIGITS */
252
3.88k
    case 0x06F0:
253
4.09k
    case 0x06F1:
254
4.43k
    case 0x06F2:
255
4.63k
    case 0x06F3:
256
4.85k
    case 0x06F4:
257
5.05k
    case 0x06F5:
258
5.24k
    case 0x06F6:
259
5.57k
    case 0x06F7:
260
5.79k
    case 0x06F8:
261
6.07k
    case 0x06F9:
262
      /* EXTENDED ARABIC-INDIC DIGITS */
263
6.27k
    case 0x30FB:
264
      /* KATAKANA MIDDLE DOT */
265
6.27k
      return true;
266
0
      break;
267
6.27k
    }
268
269
0
  return false;
270
6.27k
}