/src/harfbuzz/src/hb-ucd.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> |
3 | | * |
4 | | * Permission to use, copy, modify, and/or distribute this software for any |
5 | | * purpose with or without fee is hereby granted, provided that the above |
6 | | * copyright notice and this permission notice appear in all copies. |
7 | | * |
8 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
9 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
11 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
13 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
14 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | | */ |
16 | | |
17 | | #include "hb.hh" |
18 | | #include "hb-unicode.hh" |
19 | | #include "hb-machinery.hh" |
20 | | |
21 | | #include "hb-ucd-table.hh" |
22 | | |
23 | | static hb_unicode_combining_class_t |
24 | | hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
25 | | hb_codepoint_t unicode, |
26 | | void *user_data HB_UNUSED) |
27 | 332k | { |
28 | 332k | return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode); |
29 | 332k | } |
30 | | |
31 | | static hb_unicode_general_category_t |
32 | | hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
33 | | hb_codepoint_t unicode, |
34 | | void *user_data HB_UNUSED) |
35 | 20.5M | { |
36 | 20.5M | return (hb_unicode_general_category_t) _hb_ucd_gc (unicode); |
37 | 20.5M | } |
38 | | |
39 | | static hb_codepoint_t |
40 | | hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
41 | | hb_codepoint_t unicode, |
42 | | void *user_data HB_UNUSED) |
43 | 608k | { |
44 | 608k | return unicode + _hb_ucd_bmg (unicode); |
45 | 608k | } |
46 | | |
47 | | static hb_script_t |
48 | | hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
49 | | hb_codepoint_t unicode, |
50 | | void *user_data HB_UNUSED) |
51 | 3.19M | { |
52 | 3.19M | return _hb_ucd_sc_map[_hb_ucd_sc (unicode)]; |
53 | 3.19M | } |
54 | | |
55 | | |
56 | 19.4M | #define SBASE 0xAC00u |
57 | 653k | #define LBASE 0x1100u |
58 | 252k | #define VBASE 0x1161u |
59 | 272k | #define TBASE 0x11A7u |
60 | 18.9M | #define SCOUNT 11172u |
61 | 155k | #define LCOUNT 19u |
62 | 7.72k | #define VCOUNT 21u |
63 | 110k | #define TCOUNT 28u |
64 | 7.57k | #define NCOUNT (VCOUNT * TCOUNT) |
65 | | |
66 | | static inline bool |
67 | | _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) |
68 | 18.7M | { |
69 | 18.7M | unsigned si = ab - SBASE; |
70 | | |
71 | 18.7M | if (si >= SCOUNT) |
72 | 18.7M | return false; |
73 | | |
74 | 27.5k | if (si % TCOUNT) |
75 | 23.7k | { |
76 | | /* LV,T */ |
77 | 23.7k | *a = SBASE + (si / TCOUNT) * TCOUNT; |
78 | 23.7k | *b = TBASE + (si % TCOUNT); |
79 | 23.7k | return true; |
80 | 23.7k | } else { |
81 | | /* L,V */ |
82 | 3.78k | *a = LBASE + (si / NCOUNT); |
83 | 3.78k | *b = VBASE + (si % NCOUNT) / TCOUNT; |
84 | 3.78k | return true; |
85 | 3.78k | } |
86 | 27.5k | } |
87 | | |
88 | | static inline bool |
89 | | _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) |
90 | 247k | { |
91 | 247k | if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) && |
92 | 247k | !((a - SBASE) % TCOUNT)) |
93 | 0 | { |
94 | | /* LV,T */ |
95 | 0 | *ab = a + (b - TBASE); |
96 | 0 | return true; |
97 | 0 | } |
98 | 247k | else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) |
99 | 0 | { |
100 | | /* L,V */ |
101 | 0 | int li = a - LBASE; |
102 | 0 | int vi = b - VBASE; |
103 | 0 | *ab = SBASE + li * NCOUNT + vi * TCOUNT; |
104 | 0 | return true; |
105 | 0 | } |
106 | 247k | else |
107 | 247k | return false; |
108 | 247k | } |
109 | | |
110 | | static int |
111 | | _cmp_pair (const void *_key, const void *_item) |
112 | 2.06M | { |
113 | 2.06M | uint64_t& a = * (uint64_t*) _key; |
114 | 2.06M | uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0); |
115 | | |
116 | 2.06M | return a < b ? -1 : a > b ? +1 : 0; |
117 | 2.06M | } |
118 | | static int |
119 | | _cmp_pair_11_7_14 (const void *_key, const void *_item) |
120 | 105k | { |
121 | 105k | uint32_t& a = * (uint32_t*) _key; |
122 | 105k | uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0); |
123 | | |
124 | 105k | return a < b ? -1 : a > b ? +1 : 0; |
125 | 105k | } |
126 | | |
127 | | static hb_bool_t |
128 | | hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
129 | | hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab, |
130 | | void *user_data HB_UNUSED) |
131 | 247k | { |
132 | | // Hangul is handled algorithmically. |
133 | 247k | if (_hb_ucd_compose_hangul (a, b, ab)) return true; |
134 | | |
135 | 247k | hb_codepoint_t u = 0; |
136 | | |
137 | 247k | if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u) |
138 | 11.4k | { |
139 | | /* If "a" is small enough and "b" is in the U+0300 range, |
140 | | * the composition data is encoded in a 32bit array sorted |
141 | | * by "a,b" pair. */ |
142 | 11.4k | uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0); |
143 | 11.4k | const uint32_t *v = hb_bsearch (k, |
144 | 11.4k | _hb_ucd_dm2_u32_map, |
145 | 11.4k | ARRAY_LENGTH (_hb_ucd_dm2_u32_map), |
146 | 11.4k | sizeof (*_hb_ucd_dm2_u32_map), |
147 | 11.4k | _cmp_pair_11_7_14); |
148 | 11.4k | if (likely (!v)) return false; |
149 | 2.72k | u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v); |
150 | 2.72k | } |
151 | 235k | else |
152 | 235k | { |
153 | | /* Otherwise it is stored in a 64bit array sorted by |
154 | | * "a,b" pair. */ |
155 | 235k | uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0); |
156 | 235k | const uint64_t *v = hb_bsearch (k, |
157 | 235k | _hb_ucd_dm2_u64_map, |
158 | 235k | ARRAY_LENGTH (_hb_ucd_dm2_u64_map), |
159 | 235k | sizeof (*_hb_ucd_dm2_u64_map), |
160 | 235k | _cmp_pair); |
161 | 235k | if (likely (!v)) return false; |
162 | 1.72k | u = HB_CODEPOINT_DECODE3_3 (*v); |
163 | 1.72k | } |
164 | | |
165 | 4.44k | if (unlikely (!u)) return false; |
166 | 3.13k | *ab = u; |
167 | 3.13k | return true; |
168 | 4.44k | } |
169 | | |
170 | | static hb_bool_t |
171 | | hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
172 | | hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b, |
173 | | void *user_data HB_UNUSED) |
174 | 18.7M | { |
175 | 18.7M | if (_hb_ucd_decompose_hangul (ab, a, b)) return true; |
176 | | |
177 | 18.7M | unsigned i = _hb_ucd_dm (ab); |
178 | | |
179 | | /* If no data, there's no decomposition. */ |
180 | 18.7M | if (likely (!i)) return false; |
181 | 478k | i--; |
182 | | |
183 | | /* Check if it's a single-character decomposition. */ |
184 | 478k | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map)) |
185 | 12.6k | { |
186 | | /* Single-character decompositions currently are only in plane 0 or plane 2. */ |
187 | 12.6k | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map)) |
188 | 12.3k | { |
189 | | /* Plane 0. */ |
190 | 12.3k | *a = _hb_ucd_dm1_p0_map[i]; |
191 | 12.3k | } |
192 | 267 | else |
193 | 267 | { |
194 | | /* Plane 2. */ |
195 | 267 | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map); |
196 | 267 | *a = 0x20000 | _hb_ucd_dm1_p2_map[i]; |
197 | 267 | } |
198 | 12.6k | *b = 0; |
199 | 12.6k | return true; |
200 | 12.6k | } |
201 | 465k | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map); |
202 | | |
203 | | /* Otherwise they are encoded either in a 32bit array or a 64bit array. */ |
204 | 465k | if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map)) |
205 | 460k | { |
206 | | /* 32bit array. */ |
207 | 460k | uint32_t v = _hb_ucd_dm2_u32_map[i]; |
208 | 460k | *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v); |
209 | 460k | *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v); |
210 | 460k | return true; |
211 | 460k | } |
212 | 5.08k | i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map); |
213 | | |
214 | | /* 64bit array. */ |
215 | 5.08k | uint64_t v = _hb_ucd_dm2_u64_map[i]; |
216 | 5.08k | *a = HB_CODEPOINT_DECODE3_1 (v); |
217 | 5.08k | *b = HB_CODEPOINT_DECODE3_2 (v); |
218 | 5.08k | return true; |
219 | 465k | } |
220 | | |
221 | | |
222 | | static void free_static_ucd_funcs (); |
223 | | |
224 | | static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t> |
225 | | { |
226 | | static hb_unicode_funcs_t *create () |
227 | 1.84k | { |
228 | 1.84k | hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); |
229 | | |
230 | 1.84k | hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr); |
231 | 1.84k | hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr); |
232 | 1.84k | hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr); |
233 | 1.84k | hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr); |
234 | 1.84k | hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr); |
235 | 1.84k | hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr); |
236 | | |
237 | 1.84k | hb_unicode_funcs_make_immutable (funcs); |
238 | | |
239 | 1.84k | hb_atexit (free_static_ucd_funcs); |
240 | | |
241 | 1.84k | return funcs; |
242 | 1.84k | } |
243 | | } static_ucd_funcs; |
244 | | |
245 | | static inline |
246 | | void free_static_ucd_funcs () |
247 | 0 | { |
248 | 0 | static_ucd_funcs.free_instance (); |
249 | 0 | } |
250 | | |
251 | | hb_unicode_funcs_t * |
252 | | hb_ucd_get_unicode_funcs () |
253 | 1.92M | { |
254 | | #ifdef HB_NO_UCD |
255 | | return hb_unicode_funcs_get_empty (); |
256 | | #endif |
257 | 1.92M | return static_ucd_funcs.get_unconst (); |
258 | 1.92M | } |