/src/harfbuzz/src/hb-ot-shaper-khmer.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright © 2011,2012 Google, Inc. |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #include "hb.hh" |
28 | | |
29 | | #ifndef HB_NO_OT_SHAPE |
30 | | |
31 | | #include "hb-ot-shaper-khmer-machine.hh" |
32 | | #include "hb-ot-shaper-indic.hh" |
33 | | #include "hb-ot-layout.hh" |
34 | | |
35 | | |
36 | | /* |
37 | | * Khmer shaper. |
38 | | */ |
39 | | |
40 | | |
41 | | static const hb_ot_map_feature_t |
42 | | khmer_features[] = |
43 | | { |
44 | | /* |
45 | | * Basic features. |
46 | | * These features are applied all at once, before reordering, constrained |
47 | | * to the syllable. |
48 | | */ |
49 | | {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
50 | | {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
51 | | {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
52 | | {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
53 | | {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
54 | | /* |
55 | | * Other features. |
56 | | * These features are applied all at once after clearing syllables. |
57 | | */ |
58 | | {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, |
59 | | {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, |
60 | | {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, |
61 | | {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, |
62 | | }; |
63 | | |
64 | | /* |
65 | | * Must be in the same order as the khmer_features array. |
66 | | */ |
67 | | enum { |
68 | | KHMER_PREF, |
69 | | KHMER_BLWF, |
70 | | KHMER_ABVF, |
71 | | KHMER_PSTF, |
72 | | KHMER_CFAR, |
73 | | |
74 | | _KHMER_PRES, |
75 | | _KHMER_ABVS, |
76 | | _KHMER_BLWS, |
77 | | _KHMER_PSTS, |
78 | | |
79 | | KHMER_NUM_FEATURES, |
80 | | KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ |
81 | | }; |
82 | | |
83 | | static inline void |
84 | | set_khmer_properties (hb_glyph_info_t &info) |
85 | 0 | { |
86 | 0 | hb_codepoint_t u = info.codepoint; |
87 | 0 | unsigned int type = hb_indic_get_categories (u); |
88 | |
|
89 | 0 | info.khmer_category() = (khmer_category_t) (type & 0xFFu); |
90 | 0 | } |
91 | | |
92 | | static bool |
93 | | setup_syllables_khmer (const hb_ot_shape_plan_t *plan, |
94 | | hb_font_t *font, |
95 | | hb_buffer_t *buffer); |
96 | | static bool |
97 | | reorder_khmer (const hb_ot_shape_plan_t *plan, |
98 | | hb_font_t *font, |
99 | | hb_buffer_t *buffer); |
100 | | |
101 | | static void |
102 | | collect_features_khmer (hb_ot_shape_planner_t *plan) |
103 | 0 | { |
104 | 0 | hb_ot_map_builder_t *map = &plan->map; |
105 | | |
106 | | /* Do this before any lookups have been applied. */ |
107 | 0 | map->add_gsub_pause (setup_syllables_khmer); |
108 | 0 | map->add_gsub_pause (reorder_khmer); |
109 | | |
110 | | /* Testing suggests that Uniscribe does NOT pause between basic |
111 | | * features. Test with KhmerUI.ttf and the following three |
112 | | * sequences: |
113 | | * |
114 | | * U+1789,U+17BC |
115 | | * U+1789,U+17D2,U+1789 |
116 | | * U+1789,U+17D2,U+1789,U+17BC |
117 | | * |
118 | | * https://github.com/harfbuzz/harfbuzz/issues/974 |
119 | | */ |
120 | 0 | map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE); |
121 | 0 | map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE); |
122 | |
|
123 | 0 | unsigned int i = 0; |
124 | 0 | for (; i < KHMER_BASIC_FEATURES; i++) |
125 | 0 | map->add_feature (khmer_features[i]); |
126 | | |
127 | | /* https://github.com/harfbuzz/harfbuzz/issues/3531 */ |
128 | 0 | map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var |
129 | |
|
130 | 0 | for (; i < KHMER_NUM_FEATURES; i++) |
131 | 0 | map->add_feature (khmer_features[i]); |
132 | 0 | } |
133 | | |
134 | | static void |
135 | | override_features_khmer (hb_ot_shape_planner_t *plan) |
136 | 0 | { |
137 | 0 | hb_ot_map_builder_t *map = &plan->map; |
138 | | |
139 | | /* Khmer spec has 'clig' as part of required shaping features: |
140 | | * "Apply feature 'clig' to form ligatures that are desired for |
141 | | * typographical correctness.", hence in overrides... */ |
142 | 0 | map->enable_feature (HB_TAG('c','l','i','g')); |
143 | | |
144 | | /* Uniscribe does not apply 'kern' in Khmer. */ |
145 | 0 | if (hb_options ().uniscribe_bug_compatible) |
146 | 0 | { |
147 | 0 | map->disable_feature (HB_TAG('k','e','r','n')); |
148 | 0 | } |
149 | |
|
150 | 0 | map->disable_feature (HB_TAG('l','i','g','a')); |
151 | 0 | } |
152 | | |
153 | | |
154 | | struct khmer_shape_plan_t |
155 | | { |
156 | | hb_mask_t mask_array[KHMER_NUM_FEATURES]; |
157 | | }; |
158 | | |
159 | | static void * |
160 | | data_create_khmer (const hb_ot_shape_plan_t *plan) |
161 | 0 | { |
162 | 0 | khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) hb_calloc (1, sizeof (khmer_shape_plan_t)); |
163 | 0 | if (unlikely (!khmer_plan)) |
164 | 0 | return nullptr; |
165 | | |
166 | 0 | for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++) |
167 | 0 | khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ? |
168 | 0 | 0 : plan->map.get_1_mask (khmer_features[i].tag); |
169 | |
|
170 | 0 | return khmer_plan; |
171 | 0 | } |
172 | | |
173 | | static void |
174 | | data_destroy_khmer (void *data) |
175 | 0 | { |
176 | 0 | hb_free (data); |
177 | 0 | } |
178 | | |
179 | | static void |
180 | | setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
181 | | hb_buffer_t *buffer, |
182 | | hb_font_t *font HB_UNUSED) |
183 | 0 | { |
184 | 0 | HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category); |
185 | | |
186 | | /* We cannot setup masks here. We save information about characters |
187 | | * and setup masks later on in a pause-callback. */ |
188 | |
|
189 | 0 | unsigned int count = buffer->len; |
190 | 0 | hb_glyph_info_t *info = buffer->info; |
191 | 0 | for (unsigned int i = 0; i < count; i++) |
192 | 0 | set_khmer_properties (info[i]); |
193 | 0 | } |
194 | | |
195 | | static bool |
196 | | setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
197 | | hb_font_t *font HB_UNUSED, |
198 | | hb_buffer_t *buffer) |
199 | 0 | { |
200 | 0 | HB_BUFFER_ALLOCATE_VAR (buffer, syllable); |
201 | 0 | find_syllables_khmer (buffer); |
202 | 0 | foreach_syllable (buffer, start, end) |
203 | 0 | buffer->unsafe_to_break (start, end); |
204 | 0 | return false; |
205 | 0 | } |
206 | | |
207 | | |
208 | | /* Rules from: |
209 | | * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ |
210 | | |
211 | | static void |
212 | | reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, |
213 | | hb_face_t *face HB_UNUSED, |
214 | | hb_buffer_t *buffer, |
215 | | unsigned int start, unsigned int end) |
216 | 0 | { |
217 | 0 | const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data; |
218 | 0 | hb_glyph_info_t *info = buffer->info; |
219 | | |
220 | | /* Setup masks. */ |
221 | 0 | { |
222 | | /* Post-base */ |
223 | 0 | hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | |
224 | 0 | khmer_plan->mask_array[KHMER_ABVF] | |
225 | 0 | khmer_plan->mask_array[KHMER_PSTF]; |
226 | 0 | for (unsigned int i = start + 1; i < end; i++) |
227 | 0 | info[i].mask |= mask; |
228 | 0 | } |
229 | |
|
230 | 0 | unsigned int num_coengs = 0; |
231 | 0 | for (unsigned int i = start + 1; i < end; i++) |
232 | 0 | { |
233 | | /* """ |
234 | | * When a COENG + (Cons | IndV) combination are found (and subscript count |
235 | | * is less than two) the character combination is handled according to the |
236 | | * subscript type of the character following the COENG. |
237 | | * |
238 | | * ... |
239 | | * |
240 | | * Subscript Type 2 - The COENG + RO characters are reordered to immediately |
241 | | * before the base glyph. Then the COENG + RO characters are assigned to have |
242 | | * the 'pref' OpenType feature applied to them. |
243 | | * """ |
244 | | */ |
245 | 0 | if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end) |
246 | 0 | { |
247 | 0 | num_coengs++; |
248 | |
|
249 | 0 | if (info[i + 1].khmer_category() == K_Cat(Ra)) |
250 | 0 | { |
251 | 0 | for (unsigned int j = 0; j < 2; j++) |
252 | 0 | info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; |
253 | | |
254 | | /* Move the Coeng,Ro sequence to the start. */ |
255 | 0 | buffer->merge_clusters (start, i + 2); |
256 | 0 | hb_glyph_info_t t0 = info[i]; |
257 | 0 | hb_glyph_info_t t1 = info[i + 1]; |
258 | 0 | memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0])); |
259 | 0 | info[start] = t0; |
260 | 0 | info[start + 1] = t1; |
261 | | |
262 | | /* Mark the subsequent stuff with 'cfar'. Used in Khmer. |
263 | | * Read the feature spec. |
264 | | * This allows distinguishing the following cases with MS Khmer fonts: |
265 | | * U+1784,U+17D2,U+179A,U+17D2,U+1782 |
266 | | * U+1784,U+17D2,U+1782,U+17D2,U+179A |
267 | | */ |
268 | 0 | if (khmer_plan->mask_array[KHMER_CFAR]) |
269 | 0 | for (unsigned int j = i + 2; j < end; j++) |
270 | 0 | info[j].mask |= khmer_plan->mask_array[KHMER_CFAR]; |
271 | |
|
272 | 0 | num_coengs = 2; /* Done. */ |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | | /* Reorder left matra piece. */ |
277 | 0 | else if (info[i].khmer_category() == K_Cat(VPre)) |
278 | 0 | { |
279 | | /* Move to the start. */ |
280 | 0 | buffer->merge_clusters (start, i + 1); |
281 | 0 | hb_glyph_info_t t = info[i]; |
282 | 0 | memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0])); |
283 | 0 | info[start] = t; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | static void |
289 | | reorder_syllable_khmer (const hb_ot_shape_plan_t *plan, |
290 | | hb_face_t *face, |
291 | | hb_buffer_t *buffer, |
292 | | unsigned int start, unsigned int end) |
293 | 0 | { |
294 | 0 | khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F); |
295 | 0 | switch (syllable_type) |
296 | 0 | { |
297 | 0 | case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ |
298 | 0 | case khmer_consonant_syllable: |
299 | 0 | reorder_consonant_syllable (plan, face, buffer, start, end); |
300 | 0 | break; |
301 | | |
302 | 0 | case khmer_non_khmer_cluster: |
303 | 0 | break; |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | | static bool |
308 | | reorder_khmer (const hb_ot_shape_plan_t *plan, |
309 | | hb_font_t *font, |
310 | | hb_buffer_t *buffer) |
311 | 0 | { |
312 | 0 | bool ret = false; |
313 | 0 | if (buffer->message (font, "start reordering khmer")) |
314 | 0 | { |
315 | 0 | if (hb_syllabic_insert_dotted_circles (font, buffer, |
316 | 0 | khmer_broken_cluster, |
317 | 0 | K_Cat(DOTTEDCIRCLE), |
318 | 0 | (unsigned) -1)) |
319 | 0 | ret = true; |
320 | |
|
321 | 0 | foreach_syllable (buffer, start, end) |
322 | 0 | reorder_syllable_khmer (plan, font->face, buffer, start, end); |
323 | 0 | (void) buffer->message (font, "end reordering khmer"); |
324 | 0 | } |
325 | 0 | HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category); |
326 | |
|
327 | 0 | return ret; |
328 | 0 | } |
329 | | |
330 | | |
331 | | static bool |
332 | | decompose_khmer (const hb_ot_shape_normalize_context_t *c, |
333 | | hb_codepoint_t ab, |
334 | | hb_codepoint_t *a, |
335 | | hb_codepoint_t *b) |
336 | 0 | { |
337 | 0 | switch (ab) |
338 | 0 | { |
339 | | /* |
340 | | * Decompose split matras that don't have Unicode decompositions. |
341 | | */ |
342 | | |
343 | | /* Khmer */ |
344 | 0 | case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; |
345 | 0 | case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; |
346 | 0 | case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; |
347 | 0 | case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; |
348 | 0 | case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; |
349 | 0 | } |
350 | | |
351 | 0 | return (bool) c->unicode->decompose (ab, a, b); |
352 | 0 | } |
353 | | |
354 | | static bool |
355 | | compose_khmer (const hb_ot_shape_normalize_context_t *c, |
356 | | hb_codepoint_t a, |
357 | | hb_codepoint_t b, |
358 | | hb_codepoint_t *ab) |
359 | 0 | { |
360 | | /* Avoid recomposing split matras. */ |
361 | 0 | if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) |
362 | 0 | return false; |
363 | | |
364 | 0 | return (bool) c->unicode->compose (a, b, ab); |
365 | 0 | } |
366 | | |
367 | | |
368 | | const hb_ot_shaper_t _hb_ot_shaper_khmer = |
369 | | { |
370 | | collect_features_khmer, |
371 | | override_features_khmer, |
372 | | data_create_khmer, |
373 | | data_destroy_khmer, |
374 | | nullptr, /* preprocess_text */ |
375 | | nullptr, /* postprocess_glyphs */ |
376 | | decompose_khmer, |
377 | | compose_khmer, |
378 | | setup_masks_khmer, |
379 | | nullptr, /* reorder_marks */ |
380 | | HB_TAG_NONE, /* gpos_tag */ |
381 | | HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
382 | | HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
383 | | false, /* fallback_position */ |
384 | | }; |
385 | | |
386 | | |
387 | | #endif |