Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/harfbuzz/src/hb-ucd.cc
Line
Count
Source
1
/*
2
 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
3
 *
4
 * Permission to use, copy, modify, and/or distribute this software for any
5
 * purpose with or without fee is hereby granted, provided that the above
6
 * copyright notice and this permission notice appear in all copies.
7
 *
8
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
 */
16
17
#include "hb.hh"
18
#include "hb-unicode.hh"
19
#include "hb-machinery.hh"
20
21
#include "hb-ucd-table.hh"
22
23
static hb_unicode_combining_class_t
24
hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
25
      hb_codepoint_t unicode,
26
      void *user_data HB_UNUSED)
27
2.17M
{
28
2.17M
  return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
29
2.17M
}
30
31
static hb_unicode_general_category_t
32
hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
33
       hb_codepoint_t unicode,
34
       void *user_data HB_UNUSED)
35
244M
{
36
244M
  return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
37
244M
}
38
39
static hb_codepoint_t
40
hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
41
      hb_codepoint_t unicode,
42
      void *user_data HB_UNUSED)
43
49.2M
{
44
49.2M
  return unicode + _hb_ucd_bmg (unicode);
45
49.2M
}
46
47
static hb_script_t
48
hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
49
         hb_codepoint_t unicode,
50
         void *user_data HB_UNUSED)
51
0
{
52
0
  return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
53
0
}
54
55
56
35.7M
#define SBASE 0xAC00u
57
2.62M
#define LBASE 0x1100u
58
1.20M
#define VBASE 0x1161u
59
2.75M
#define TBASE 0x11A7u
60
31.8M
#define SCOUNT 11172u
61
323k
#define LCOUNT 19u
62
205k
#define VCOUNT 21u
63
7.02M
#define TCOUNT 28u
64
205k
#define NCOUNT (VCOUNT * TCOUNT)
65
66
static inline bool
67
_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
68
31.8M
{
69
31.8M
  unsigned si = ab - SBASE;
70
71
31.8M
  if (si >= SCOUNT)
72
30.0M
    return false;
73
74
1.75M
  if (si % TCOUNT)
75
1.65M
  {
76
    /* LV,T */
77
1.65M
    *a = SBASE + (si / TCOUNT) * TCOUNT;
78
1.65M
    *b = TBASE + (si % TCOUNT);
79
1.65M
    return true;
80
1.65M
  } else {
81
    /* L,V */
82
102k
    *a = LBASE + (si / NCOUNT);
83
102k
    *b = VBASE + (si % NCOUNT) / TCOUNT;
84
102k
    return true;
85
102k
  }
86
1.75M
}
87
88
static inline bool
89
_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
90
1.10M
{
91
1.10M
  if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
92
0
    !((a - SBASE) % TCOUNT))
93
0
  {
94
    /* LV,T */
95
0
    *ab = a + (b - TBASE);
96
0
    return true;
97
0
  }
98
1.10M
  else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
99
0
  {
100
    /* L,V */
101
0
    int li = a - LBASE;
102
0
    int vi = b - VBASE;
103
0
    *ab = SBASE + li * NCOUNT + vi * TCOUNT;
104
0
    return true;
105
0
  }
106
1.10M
  else
107
1.10M
    return false;
108
1.10M
}
109
110
static int
111
_cmp_pair (const void *_key, const void *_item)
112
8.54M
{
113
8.54M
  uint64_t& a = * (uint64_t*) _key;
114
8.54M
  uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
115
116
8.54M
  return a < b ? -1 : a > b ? +1 : 0;
117
8.54M
}
118
static int
119
_cmp_pair_11_7_14 (const void *_key, const void *_item)
120
1.02M
{
121
1.02M
  uint32_t& a = * (uint32_t*) _key;
122
1.02M
  uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
123
124
1.02M
  return a < b ? -1 : a > b ? +1 : 0;
125
1.02M
}
126
127
static hb_bool_t
128
hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
129
    hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
130
    void *user_data HB_UNUSED)
131
1.10M
{
132
  // Hangul is handled algorithmically.
133
1.10M
  if (_hb_ucd_compose_hangul (a, b, ab)) return true;
134
135
1.10M
  hb_codepoint_t u = 0;
136
137
1.10M
  if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
138
112k
  {
139
    /* If "a" is small enough and "b" is in the U+0300 range,
140
     * the composition data is encoded in a 32bit array sorted
141
     * by "a,b" pair. */
142
112k
    uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
143
112k
    const uint32_t *v = hb_bsearch (k,
144
112k
            _hb_ucd_dm2_u32_map,
145
112k
            ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
146
112k
            sizeof (*_hb_ucd_dm2_u32_map),
147
112k
            _cmp_pair_11_7_14);
148
112k
    if (likely (!v)) return false;
149
39.8k
    u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
150
39.8k
  }
151
989k
  else
152
989k
  {
153
    /* Otherwise it is stored in a 64bit array sorted by
154
     * "a,b" pair. */
155
989k
    uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
156
989k
    const uint64_t *v = hb_bsearch (k,
157
989k
            _hb_ucd_dm2_u64_map,
158
989k
            ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
159
989k
            sizeof (*_hb_ucd_dm2_u64_map),
160
989k
            _cmp_pair);
161
989k
    if (likely (!v)) return false;
162
24.8k
    u = HB_CODEPOINT_DECODE3_3 (*v);
163
24.8k
  }
164
165
64.7k
  if (unlikely (!u)) return false;
166
40.7k
  *ab = u;
167
40.7k
  return true;
168
64.7k
}
169
170
static hb_bool_t
171
hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
172
      hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
173
      void *user_data HB_UNUSED)
174
31.8M
{
175
31.8M
  if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
176
177
30.0M
  unsigned i = _hb_ucd_dm (ab);
178
179
  /* If no data, there's no decomposition. */
180
30.0M
  if (likely (!i)) return false;
181
184k
  i--;
182
183
  /* Check if it's a single-character decomposition. */
184
184k
  if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
185
82.6k
  {
186
    /* Single-character decompositions currently are only in plane 0 or plane 2. */
187
82.6k
    if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
188
81.2k
    {
189
      /* Plane 0. */
190
81.2k
      *a = _hb_ucd_dm1_p0_map[i];
191
81.2k
    }
192
1.32k
    else
193
1.32k
    {
194
      /* Plane 2. */
195
1.32k
      i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
196
1.32k
      *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
197
1.32k
    }
198
82.6k
    *b = 0;
199
82.6k
    return true;
200
82.6k
  }
201
102k
  i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
202
203
  /* Otherwise they are encoded either in a 32bit array or a 64bit array. */
204
102k
  if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
205
46.0k
  {
206
    /* 32bit array. */
207
46.0k
    uint32_t v = _hb_ucd_dm2_u32_map[i];
208
46.0k
    *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
209
46.0k
    *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
210
46.0k
    return true;
211
46.0k
  }
212
56.1k
  i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
213
214
  /* 64bit array. */
215
56.1k
  uint64_t v = _hb_ucd_dm2_u64_map[i];
216
56.1k
  *a = HB_CODEPOINT_DECODE3_1 (v);
217
56.1k
  *b = HB_CODEPOINT_DECODE3_2 (v);
218
56.1k
  return true;
219
102k
}
220
221
222
static void free_static_ucd_funcs ();
223
224
static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
225
{
226
  static hb_unicode_funcs_t *create ()
227
29
  {
228
29
    hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
229
230
29
    hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
231
29
    hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
232
29
    hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
233
29
    hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
234
29
    hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
235
29
    hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
236
237
29
    hb_unicode_funcs_make_immutable (funcs);
238
239
29
    hb_atexit (free_static_ucd_funcs);
240
241
29
    return funcs;
242
29
  }
243
} static_ucd_funcs;
244
245
static inline
246
void free_static_ucd_funcs ()
247
29
{
248
29
  static_ucd_funcs.free_instance ();
249
29
}
250
251
hb_unicode_funcs_t *
252
hb_ucd_get_unicode_funcs ()
253
11.5M
{
254
#ifdef HB_NO_UCD
255
  return hb_unicode_funcs_get_empty ();
256
#endif
257
11.5M
  return static_ucd_funcs.get_unconst ();
258
11.5M
}