/src/harfbuzz/src/hb-ot-shaper-khmer.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2011,2012 Google, Inc. |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #include "hb.hh" |
28 | | |
29 | | #ifndef HB_NO_OT_SHAPE |
30 | | |
31 | | #include "hb-ot-shaper-khmer-machine.hh" |
32 | | #include "hb-ot-shaper-indic.hh" |
33 | | #include "hb-ot-layout.hh" |
34 | | |
35 | | |
36 | | /* |
37 | | * Khmer shaper. |
38 | | */ |
39 | | |
40 | | |
41 | | static const hb_ot_map_feature_t |
42 | | khmer_features[] = |
43 | | { |
44 | | /* |
45 | | * Basic features. |
46 | | * These features are applied all at once, before reordering, constrained |
47 | | * to the syllable. |
48 | | */ |
49 | | {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
50 | | {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
51 | | {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
52 | | {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
53 | | {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
54 | | /* |
55 | | * Other features. |
56 | | * These features are applied all at once after clearing syllables. |
57 | | */ |
58 | | {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, |
59 | | {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, |
60 | | {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, |
61 | | {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, |
62 | | }; |
63 | | |
64 | | /* |
65 | | * Must be in the same order as the khmer_features array. |
66 | | */ |
67 | | enum { |
68 | | KHMER_PREF, |
69 | | KHMER_BLWF, |
70 | | KHMER_ABVF, |
71 | | KHMER_PSTF, |
72 | | KHMER_CFAR, |
73 | | |
74 | | _KHMER_PRES, |
75 | | _KHMER_ABVS, |
76 | | _KHMER_BLWS, |
77 | | _KHMER_PSTS, |
78 | | |
79 | | KHMER_NUM_FEATURES, |
80 | | KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ |
81 | | }; |
82 | | |
83 | | static inline void |
84 | | set_khmer_properties (hb_glyph_info_t &info) |
85 | 0 | { |
86 | 0 | hb_codepoint_t u = info.codepoint; |
87 | 0 | unsigned int type = hb_indic_get_categories (u); |
88 | |
|
89 | 0 | info.khmer_category() = (khmer_category_t) (type & 0xFFu); |
90 | 0 | } |
91 | | |
92 | | static bool |
93 | | setup_syllables_khmer (const hb_ot_shape_plan_t *plan, |
94 | | hb_font_t *font, |
95 | | hb_buffer_t *buffer); |
96 | | static bool |
97 | | reorder_khmer (const hb_ot_shape_plan_t *plan, |
98 | | hb_font_t *font, |
99 | | hb_buffer_t *buffer); |
100 | | |
101 | | static void |
102 | | collect_features_khmer (hb_ot_shape_planner_t *plan) |
103 | 0 | { |
104 | 0 | hb_ot_map_builder_t *map = &plan->map; |
105 | | |
106 | | /* Do this before any lookups have been applied. */ |
107 | 0 | map->add_gsub_pause (setup_syllables_khmer); |
108 | 0 | map->add_gsub_pause (reorder_khmer); |
109 | | |
110 | | /* Testing suggests that Uniscribe does NOT pause between basic |
111 | | * features. Test with KhmerUI.ttf and the following three |
112 | | * sequences: |
113 | | * |
114 | | * U+1789,U+17BC |
115 | | * U+1789,U+17D2,U+1789 |
116 | | * U+1789,U+17D2,U+1789,U+17BC |
117 | | * |
118 | | * https://github.com/harfbuzz/harfbuzz/issues/974 |
119 | | */ |
120 | 0 | map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE); |
121 | 0 | map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE); |
122 | |
|
123 | 0 | unsigned int i = 0; |
124 | 0 | for (; i < KHMER_BASIC_FEATURES; i++) |
125 | 0 | map->add_feature (khmer_features[i]); |
126 | | |
127 | | /* https://github.com/harfbuzz/harfbuzz/issues/3531 */ |
128 | 0 | map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var |
129 | |
|
130 | 0 | for (; i < KHMER_NUM_FEATURES; i++) |
131 | 0 | map->add_feature (khmer_features[i]); |
132 | 0 | } |
133 | | |
134 | | static void |
135 | | override_features_khmer (hb_ot_shape_planner_t *plan) |
136 | 0 | { |
137 | 0 | hb_ot_map_builder_t *map = &plan->map; |
138 | | |
139 | | /* Khmer spec has 'clig' as part of required shaping features: |
140 | | * "Apply feature 'clig' to form ligatures that are desired for |
141 | | * typographical correctness.", hence in overrides... */ |
142 | 0 | map->enable_feature (HB_TAG('c','l','i','g')); |
143 | |
|
144 | 0 | map->disable_feature (HB_TAG('l','i','g','a')); |
145 | 0 | } |
146 | | |
147 | | |
148 | | struct khmer_shape_plan_t |
149 | | { |
150 | | hb_mask_t mask_array[KHMER_NUM_FEATURES]; |
151 | | }; |
152 | | |
153 | | static void * |
154 | | data_create_khmer (const hb_ot_shape_plan_t *plan) |
155 | 0 | { |
156 | 0 | khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) hb_calloc (1, sizeof (khmer_shape_plan_t)); |
157 | 0 | if (unlikely (!khmer_plan)) |
158 | 0 | return nullptr; |
159 | | |
160 | 0 | for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++) |
161 | 0 | khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ? |
162 | 0 | 0 : plan->map.get_1_mask (khmer_features[i].tag); |
163 | |
|
164 | 0 | return khmer_plan; |
165 | 0 | } |
166 | | |
167 | | static void |
168 | | data_destroy_khmer (void *data) |
169 | 0 | { |
170 | 0 | hb_free (data); |
171 | 0 | } |
172 | | |
173 | | static void |
174 | | setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
175 | | hb_buffer_t *buffer, |
176 | | hb_font_t *font HB_UNUSED) |
177 | 0 | { |
178 | 0 | HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category); |
179 | | |
180 | | /* We cannot setup masks here. We save information about characters |
181 | | * and setup masks later on in a pause-callback. */ |
182 | |
|
183 | 0 | unsigned int count = buffer->len; |
184 | 0 | hb_glyph_info_t *info = buffer->info; |
185 | 0 | for (unsigned int i = 0; i < count; i++) |
186 | 0 | set_khmer_properties (info[i]); |
187 | 0 | } |
188 | | |
189 | | static bool |
190 | | setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
191 | | hb_font_t *font HB_UNUSED, |
192 | | hb_buffer_t *buffer) |
193 | 0 | { |
194 | 0 | HB_BUFFER_ALLOCATE_VAR (buffer, syllable); |
195 | 0 | find_syllables_khmer (buffer); |
196 | 0 | foreach_syllable (buffer, start, end) |
197 | 0 | buffer->unsafe_to_break (start, end); |
198 | 0 | return false; |
199 | 0 | } |
200 | | |
201 | | |
202 | | /* Rules from: |
203 | | * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ |
204 | | |
205 | | static void |
206 | | reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, |
207 | | hb_face_t *face HB_UNUSED, |
208 | | hb_buffer_t *buffer, |
209 | | unsigned int start, unsigned int end) |
210 | 0 | { |
211 | 0 | const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data; |
212 | 0 | hb_glyph_info_t *info = buffer->info; |
213 | | |
214 | | /* Setup masks. */ |
215 | 0 | { |
216 | | /* Post-base */ |
217 | 0 | hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | |
218 | 0 | khmer_plan->mask_array[KHMER_ABVF] | |
219 | 0 | khmer_plan->mask_array[KHMER_PSTF]; |
220 | 0 | for (unsigned int i = start + 1; i < end; i++) |
221 | 0 | info[i].mask |= mask; |
222 | 0 | } |
223 | |
|
224 | 0 | unsigned int num_coengs = 0; |
225 | 0 | for (unsigned int i = start + 1; i < end; i++) |
226 | 0 | { |
227 | | /* """ |
228 | | * When a COENG + (Cons | IndV) combination are found (and subscript count |
229 | | * is less than two) the character combination is handled according to the |
230 | | * subscript type of the character following the COENG. |
231 | | * |
232 | | * ... |
233 | | * |
234 | | * Subscript Type 2 - The COENG + RO characters are reordered to immediately |
235 | | * before the base glyph. Then the COENG + RO characters are assigned to have |
236 | | * the 'pref' OpenType feature applied to them. |
237 | | * """ |
238 | | */ |
239 | 0 | if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end) |
240 | 0 | { |
241 | 0 | num_coengs++; |
242 | |
|
243 | 0 | if (info[i + 1].khmer_category() == K_Cat(Ra)) |
244 | 0 | { |
245 | 0 | for (unsigned int j = 0; j < 2; j++) |
246 | 0 | info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; |
247 | | |
248 | | /* Move the Coeng,Ro sequence to the start. */ |
249 | 0 | buffer->merge_clusters (start, i + 2); |
250 | 0 | hb_glyph_info_t t0 = info[i]; |
251 | 0 | hb_glyph_info_t t1 = info[i + 1]; |
252 | 0 | memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0])); |
253 | 0 | info[start] = t0; |
254 | 0 | info[start + 1] = t1; |
255 | | |
256 | | /* Mark the subsequent stuff with 'cfar'. Used in Khmer. |
257 | | * Read the feature spec. |
258 | | * This allows distinguishing the following cases with MS Khmer fonts: |
259 | | * U+1784,U+17D2,U+179A,U+17D2,U+1782 |
260 | | * U+1784,U+17D2,U+1782,U+17D2,U+179A |
261 | | */ |
262 | 0 | if (khmer_plan->mask_array[KHMER_CFAR]) |
263 | 0 | for (unsigned int j = i + 2; j < end; j++) |
264 | 0 | info[j].mask |= khmer_plan->mask_array[KHMER_CFAR]; |
265 | |
|
266 | 0 | num_coengs = 2; /* Done. */ |
267 | 0 | } |
268 | 0 | } |
269 | | |
270 | | /* Reorder left matra piece. */ |
271 | 0 | else if (info[i].khmer_category() == K_Cat(VPre)) |
272 | 0 | { |
273 | | /* Move to the start. */ |
274 | 0 | buffer->merge_clusters (start, i + 1); |
275 | 0 | hb_glyph_info_t t = info[i]; |
276 | 0 | memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0])); |
277 | 0 | info[start] = t; |
278 | 0 | } |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | | static void |
283 | | reorder_syllable_khmer (const hb_ot_shape_plan_t *plan, |
284 | | hb_face_t *face, |
285 | | hb_buffer_t *buffer, |
286 | | unsigned int start, unsigned int end) |
287 | 0 | { |
288 | 0 | khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F); |
289 | 0 | switch (syllable_type) |
290 | 0 | { |
291 | 0 | case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ |
292 | 0 | case khmer_consonant_syllable: |
293 | 0 | reorder_consonant_syllable (plan, face, buffer, start, end); |
294 | 0 | break; |
295 | | |
296 | 0 | case khmer_non_khmer_cluster: |
297 | 0 | break; |
298 | 0 | } |
299 | 0 | } |
300 | | |
301 | | static bool |
302 | | reorder_khmer (const hb_ot_shape_plan_t *plan, |
303 | | hb_font_t *font, |
304 | | hb_buffer_t *buffer) |
305 | 0 | { |
306 | 0 | bool ret = false; |
307 | 0 | if (buffer->message (font, "start reordering khmer")) |
308 | 0 | { |
309 | 0 | if (hb_syllabic_insert_dotted_circles (font, buffer, |
310 | 0 | khmer_broken_cluster, |
311 | 0 | K_Cat(DOTTEDCIRCLE), |
312 | 0 | (unsigned) -1)) |
313 | 0 | ret = true; |
314 | |
|
315 | 0 | foreach_syllable (buffer, start, end) |
316 | 0 | reorder_syllable_khmer (plan, font->face, buffer, start, end); |
317 | 0 | (void) buffer->message (font, "end reordering khmer"); |
318 | 0 | } |
319 | 0 | HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category); |
320 | |
|
321 | 0 | return ret; |
322 | 0 | } |
323 | | |
324 | | |
325 | | static bool |
326 | | decompose_khmer (const hb_ot_shape_normalize_context_t *c, |
327 | | hb_codepoint_t ab, |
328 | | hb_codepoint_t *a, |
329 | | hb_codepoint_t *b) |
330 | 0 | { |
331 | 0 | switch (ab) |
332 | 0 | { |
333 | | /* |
334 | | * Decompose split matras that don't have Unicode decompositions. |
335 | | */ |
336 | | |
337 | | /* Khmer */ |
338 | 0 | case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; |
339 | 0 | case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; |
340 | 0 | case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; |
341 | 0 | case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; |
342 | 0 | case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; |
343 | 0 | } |
344 | | |
345 | 0 | return (bool) c->unicode->decompose (ab, a, b); |
346 | 0 | } |
347 | | |
348 | | static bool |
349 | | compose_khmer (const hb_ot_shape_normalize_context_t *c, |
350 | | hb_codepoint_t a, |
351 | | hb_codepoint_t b, |
352 | | hb_codepoint_t *ab) |
353 | 0 | { |
354 | | /* Avoid recomposing split matras. */ |
355 | 0 | if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) |
356 | 0 | return false; |
357 | | |
358 | 0 | return (bool) c->unicode->compose (a, b, ab); |
359 | 0 | } |
360 | | |
361 | | |
362 | | const hb_ot_shaper_t _hb_ot_shaper_khmer = |
363 | | { |
364 | | collect_features_khmer, |
365 | | override_features_khmer, |
366 | | data_create_khmer, |
367 | | data_destroy_khmer, |
368 | | nullptr, /* preprocess_text */ |
369 | | nullptr, /* postprocess_glyphs */ |
370 | | decompose_khmer, |
371 | | compose_khmer, |
372 | | setup_masks_khmer, |
373 | | nullptr, /* reorder_marks */ |
374 | | HB_TAG_NONE, /* gpos_tag */ |
375 | | HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
376 | | HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
377 | | false, /* fallback_position */ |
378 | | }; |
379 | | |
380 | | |
381 | | #endif |