Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/gfx/harfbuzz/src/hb-ot-shape-complex-khmer.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright © 2011,2012  Google, Inc.
3
 *
4
 *  This is part of HarfBuzz, a text shaping library.
5
 *
6
 * Permission is hereby granted, without written agreement and without
7
 * license or royalty fees, to use, copy, modify, and distribute this
8
 * software and its documentation for any purpose, provided that the
9
 * above copyright notice and the following two paragraphs appear in
10
 * all copies of this software.
11
 *
12
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16
 * DAMAGE.
17
 *
18
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23
 *
24
 * Google Author(s): Behdad Esfahbod
25
 */
26
27
#include "hb-ot-shape-complex-khmer.hh"
28
#include "hb-ot-layout.hh"
29
30
31
/*
32
 * Khmer shaper.
33
 */
34
35
struct feature_list_t {
36
  hb_tag_t tag;
37
  hb_ot_map_feature_flags_t flags;
38
};
39
40
static const feature_list_t
41
khmer_features[] =
42
{
43
  /*
44
   * Basic features.
45
   * These features are applied in order, one at a time, after reordering.
46
   */
47
  {HB_TAG('p','r','e','f'), F_NONE},
48
  {HB_TAG('b','l','w','f'), F_NONE},
49
  {HB_TAG('a','b','v','f'), F_NONE},
50
  {HB_TAG('p','s','t','f'), F_NONE},
51
  {HB_TAG('c','f','a','r'), F_NONE},
52
  /*
53
   * Other features.
54
   * These features are applied all at once.
55
   */
56
  {HB_TAG('p','r','e','s'), F_GLOBAL},
57
  {HB_TAG('a','b','v','s'), F_GLOBAL},
58
  {HB_TAG('b','l','w','s'), F_GLOBAL},
59
  {HB_TAG('p','s','t','s'), F_GLOBAL},
60
  /* Positioning features, though we don't care about the types. */
61
  {HB_TAG('d','i','s','t'), F_GLOBAL},
62
  {HB_TAG('a','b','v','m'), F_GLOBAL},
63
  {HB_TAG('b','l','w','m'), F_GLOBAL},
64
};
65
66
/*
67
 * Must be in the same order as the khmer_features array.
68
 */
69
enum {
70
  PREF,
71
  BLWF,
72
  ABVF,
73
  PSTF,
74
  CFAR,
75
76
  _PRES,
77
  _ABVS,
78
  _BLWS,
79
  _PSTS,
80
  _DIST,
81
  _ABVM,
82
  _BLWM,
83
84
  KHMER_NUM_FEATURES,
85
  KHMER_BASIC_FEATURES = _PRES /* Don't forget to update this! */
86
};
87
88
static void
89
setup_syllables (const hb_ot_shape_plan_t *plan,
90
     hb_font_t *font,
91
     hb_buffer_t *buffer);
92
static void
93
reorder (const hb_ot_shape_plan_t *plan,
94
   hb_font_t *font,
95
   hb_buffer_t *buffer);
96
static void
97
clear_syllables (const hb_ot_shape_plan_t *plan,
98
     hb_font_t *font,
99
     hb_buffer_t *buffer);
100
101
static void
102
collect_features_khmer (hb_ot_shape_planner_t *plan)
103
0
{
104
0
  hb_ot_map_builder_t *map = &plan->map;
105
0
106
0
  /* Do this before any lookups have been applied. */
107
0
  map->add_gsub_pause (setup_syllables);
108
0
  map->add_gsub_pause (reorder);
109
0
110
0
  /* Testing suggests that Uniscribe does NOT pause between basic
111
0
   * features.  Test with KhmerUI.ttf and the following three
112
0
   * sequences:
113
0
   *
114
0
   *   U+1789,U+17BC
115
0
   *   U+1789,U+17D2,U+1789
116
0
   *   U+1789,U+17D2,U+1789,U+17BC
117
0
   *
118
0
   * https://github.com/harfbuzz/harfbuzz/issues/974
119
0
   */
120
0
  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
121
0
  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
122
0
123
0
  unsigned int i = 0;
124
0
  for (; i < KHMER_BASIC_FEATURES; i++) {
125
0
    map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
126
0
  }
127
0
128
0
  map->add_gsub_pause (clear_syllables);
129
0
130
0
  for (; i < KHMER_NUM_FEATURES; i++) {
131
0
    map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
132
0
  }
133
0
134
0
  map->add_global_bool_feature (HB_TAG('c','a','l','t'));
135
0
  map->add_global_bool_feature (HB_TAG('c','l','i','g'));
136
0
137
0
}
138
139
static void
140
override_features_khmer (hb_ot_shape_planner_t *plan)
141
0
{
142
0
  /* Uniscribe does not apply 'kern' in Khmer. */
143
0
  if (hb_options ().uniscribe_bug_compatible)
144
0
  {
145
0
    plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);
146
0
  }
147
0
148
0
  plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
149
0
}
150
151
152
struct would_substitute_feature_t
153
{
154
  inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
155
  {
156
    zero_context = zero_context_;
157
    map->get_stage_lookups (0/*GSUB*/,
158
          map->get_feature_stage (0/*GSUB*/, feature_tag),
159
          &lookups, &count);
160
  }
161
162
  inline bool would_substitute (const hb_codepoint_t *glyphs,
163
        unsigned int          glyphs_count,
164
        hb_face_t            *face) const
165
  {
166
    for (unsigned int i = 0; i < count; i++)
167
      if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
168
  return true;
169
    return false;
170
  }
171
172
  private:
173
  const hb_ot_map_t::lookup_map_t *lookups;
174
  unsigned int count;
175
  bool zero_context;
176
};
177
178
struct khmer_shape_plan_t
179
{
180
  ASSERT_POD ();
181
182
  inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
183
0
  {
184
0
    hb_codepoint_t glyph = virama_glyph;
185
0
    if (unlikely (virama_glyph == (hb_codepoint_t) -1))
186
0
    {
187
0
      if (!font->get_nominal_glyph (0x17D2u, &glyph))
188
0
  glyph = 0;
189
0
      /* Technically speaking, the spec says we should apply 'locl' to virama too.
190
0
       * Maybe one day... */
191
0
192
0
      /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
193
0
       * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
194
0
      virama_glyph = glyph;
195
0
    }
196
0
197
0
    *pglyph = glyph;
198
0
    return glyph != 0;
199
0
  }
200
201
  mutable hb_codepoint_t virama_glyph;
202
203
  would_substitute_feature_t pref;
204
205
  hb_mask_t mask_array[KHMER_NUM_FEATURES];
206
};
207
208
static void *
209
data_create_khmer (const hb_ot_shape_plan_t *plan)
210
0
{
211
0
  khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
212
0
  if (unlikely (!khmer_plan))
213
0
    return nullptr;
214
0
215
0
  khmer_plan->virama_glyph = (hb_codepoint_t) -1;
216
0
217
0
  khmer_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), true);
218
0
219
0
  for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
220
0
    khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
221
0
         0 : plan->map.get_1_mask (khmer_features[i].tag);
222
0
223
0
  return khmer_plan;
224
0
}
225
226
static void
227
data_destroy_khmer (void *data)
228
0
{
229
0
  free (data);
230
0
}
231
232
233
enum syllable_type_t {
234
  consonant_syllable,
235
  broken_cluster,
236
  non_khmer_cluster,
237
};
238
239
#include "hb-ot-shape-complex-khmer-machine.hh"
240
241
static void
242
setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
243
       hb_buffer_t              *buffer,
244
       hb_font_t                *font HB_UNUSED)
245
0
{
246
0
  HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
247
0
  HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
248
0
249
0
  /* We cannot setup masks here.  We save information about characters
250
0
   * and setup masks later on in a pause-callback. */
251
0
252
0
  unsigned int count = buffer->len;
253
0
  hb_glyph_info_t *info = buffer->info;
254
0
  for (unsigned int i = 0; i < count; i++)
255
0
    set_khmer_properties (info[i]);
256
0
}
257
258
static void
259
setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
260
     hb_font_t *font HB_UNUSED,
261
     hb_buffer_t *buffer)
262
0
{
263
0
  find_syllables (buffer);
264
0
  foreach_syllable (buffer, start, end)
265
0
    buffer->unsafe_to_break (start, end);
266
0
}
267
268
269
/* Rules from:
270
 * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
271
272
static void
273
reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
274
          hb_face_t *face,
275
          hb_buffer_t *buffer,
276
          unsigned int start, unsigned int end)
277
0
{
278
0
  const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
279
0
  hb_glyph_info_t *info = buffer->info;
280
0
281
0
  /* Setup masks. */
282
0
  {
283
0
    /* Post-base */
284
0
    hb_mask_t mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
285
0
    for (unsigned int i = start + 1; i < end; i++)
286
0
      info[i].mask  |= mask;
287
0
  }
288
0
289
0
  unsigned int num_coengs = 0;
290
0
  for (unsigned int i = start + 1; i < end; i++)
291
0
  {
292
0
    /* """
293
0
     * When a COENG + (Cons | IndV) combination are found (and subscript count
294
0
     * is less than two) the character combination is handled according to the
295
0
     * subscript type of the character following the COENG.
296
0
     *
297
0
     * ...
298
0
     *
299
0
     * Subscript Type 2 - The COENG + RO characters are reordered to immediately
300
0
     * before the base glyph. Then the COENG + RO characters are assigned to have
301
0
     * the 'pref' OpenType feature applied to them.
302
0
     * """
303
0
     */
304
0
    if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end)
305
0
    {
306
0
      num_coengs++;
307
0
308
0
      if (info[i + 1].khmer_category() == OT_Ra)
309
0
      {
310
0
  for (unsigned int j = 0; j < 2; j++)
311
0
    info[i + j].mask |= khmer_plan->mask_array[PREF];
312
0
313
0
  /* Move the Coeng,Ro sequence to the start. */
314
0
  buffer->merge_clusters (start, i + 2);
315
0
  hb_glyph_info_t t0 = info[i];
316
0
  hb_glyph_info_t t1 = info[i + 1];
317
0
  memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0]));
318
0
  info[start] = t0;
319
0
  info[start + 1] = t1;
320
0
321
0
  /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
322
0
   * Read the feature spec.
323
0
   * This allows distinguishing the following cases with MS Khmer fonts:
324
0
   * U+1784,U+17D2,U+179A,U+17D2,U+1782
325
0
   * U+1784,U+17D2,U+1782,U+17D2,U+179A
326
0
   */
327
0
  if (khmer_plan->mask_array[CFAR])
328
0
    for (unsigned int j = i + 2; j < end; j++)
329
0
      info[j].mask |= khmer_plan->mask_array[CFAR];
330
0
331
0
  num_coengs = 2; /* Done. */
332
0
      }
333
0
    }
334
0
335
0
    /* Reorder left matra piece. */
336
0
    else if (info[i].khmer_position() == POS_PRE_M)
337
0
    {
338
0
      /* Move to the start. */
339
0
      buffer->merge_clusters (start, i + 1);
340
0
      hb_glyph_info_t t = info[i];
341
0
      memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0]));
342
0
      info[start] = t;
343
0
    }
344
0
  }
345
0
}
346
347
static void
348
initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
349
           hb_face_t *face,
350
           hb_buffer_t *buffer,
351
           unsigned int start, unsigned int end)
352
0
{
353
0
  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
354
0
  switch (syllable_type)
355
0
  {
356
0
    case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
357
0
    case consonant_syllable:
358
0
     reorder_consonant_syllable (plan, face, buffer, start, end);
359
0
     break;
360
0
361
0
    case non_khmer_cluster:
362
0
      break;
363
0
  }
364
0
}
365
366
static inline void
367
insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
368
           hb_font_t *font,
369
           hb_buffer_t *buffer)
370
0
{
371
0
  /* Note: This loop is extra overhead, but should not be measurable. */
372
0
  bool has_broken_syllables = false;
373
0
  unsigned int count = buffer->len;
374
0
  hb_glyph_info_t *info = buffer->info;
375
0
  for (unsigned int i = 0; i < count; i++)
376
0
    if ((info[i].syllable() & 0x0F) == broken_cluster)
377
0
    {
378
0
      has_broken_syllables = true;
379
0
      break;
380
0
    }
381
0
  if (likely (!has_broken_syllables))
382
0
    return;
383
0
384
0
385
0
  hb_codepoint_t dottedcircle_glyph;
386
0
  if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
387
0
    return;
388
0
389
0
  hb_glyph_info_t dottedcircle = {0};
390
0
  dottedcircle.codepoint = 0x25CCu;
391
0
  set_khmer_properties (dottedcircle);
392
0
  dottedcircle.codepoint = dottedcircle_glyph;
393
0
394
0
  buffer->clear_output ();
395
0
396
0
  buffer->idx = 0;
397
0
  unsigned int last_syllable = 0;
398
0
  while (buffer->idx < buffer->len && buffer->successful)
399
0
  {
400
0
    unsigned int syllable = buffer->cur().syllable();
401
0
    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
402
0
    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
403
0
    {
404
0
      last_syllable = syllable;
405
0
406
0
      hb_glyph_info_t ginfo = dottedcircle;
407
0
      ginfo.cluster = buffer->cur().cluster;
408
0
      ginfo.mask = buffer->cur().mask;
409
0
      ginfo.syllable() = buffer->cur().syllable();
410
0
      /* TODO Set glyph_props? */
411
0
412
0
      /* Insert dottedcircle after possible Repha. */
413
0
      while (buffer->idx < buffer->len && buffer->successful &&
414
0
       last_syllable == buffer->cur().syllable() &&
415
0
       buffer->cur().khmer_category() == OT_Repha)
416
0
        buffer->next_glyph ();
417
0
418
0
      buffer->output_info (ginfo);
419
0
    }
420
0
    else
421
0
      buffer->next_glyph ();
422
0
  }
423
0
424
0
  buffer->swap_buffers ();
425
0
}
426
427
static void
428
reorder (const hb_ot_shape_plan_t *plan,
429
   hb_font_t *font,
430
   hb_buffer_t *buffer)
431
0
{
432
0
  insert_dotted_circles (plan, font, buffer);
433
0
434
0
  foreach_syllable (buffer, start, end)
435
0
    initial_reordering_syllable (plan, font->face, buffer, start, end);
436
0
437
0
  HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
438
0
  HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
439
0
}
440
441
static void
442
clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
443
     hb_font_t *font HB_UNUSED,
444
     hb_buffer_t *buffer)
445
0
{
446
0
  /* TODO: In USE, we clear syllables right after reorder.  Figure out
447
0
   * what Uniscribe does. */
448
0
  hb_glyph_info_t *info = buffer->info;
449
0
  unsigned int count = buffer->len;
450
0
  for (unsigned int i = 0; i < count; i++)
451
0
    info[i].syllable() = 0;
452
0
}
453
454
455
static bool
456
decompose_khmer (const hb_ot_shape_normalize_context_t *c,
457
     hb_codepoint_t  ab,
458
     hb_codepoint_t *a,
459
     hb_codepoint_t *b)
460
0
{
461
0
  switch (ab)
462
0
  {
463
0
    /*
464
0
     * Decompose split matras that don't have Unicode decompositions.
465
0
     */
466
0
467
0
    /* Khmer */
468
0
    case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
469
0
    case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
470
0
    case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
471
0
    case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
472
0
    case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
473
0
  }
474
0
475
0
  return (bool) c->unicode->decompose (ab, a, b);
476
0
}
477
478
static bool
479
compose_khmer (const hb_ot_shape_normalize_context_t *c,
480
         hb_codepoint_t  a,
481
         hb_codepoint_t  b,
482
         hb_codepoint_t *ab)
483
0
{
484
0
  /* Avoid recomposing split matras. */
485
0
  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
486
0
    return false;
487
0
488
0
  return (bool) c->unicode->compose (a, b, ab);
489
0
}
490
491
492
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
493
{
494
  collect_features_khmer,
495
  override_features_khmer,
496
  data_create_khmer,
497
  data_destroy_khmer,
498
  nullptr, /* preprocess_text */
499
  nullptr, /* postprocess_glyphs */
500
  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
501
  decompose_khmer,
502
  compose_khmer,
503
  setup_masks_khmer,
504
  nullptr, /* disable_otl */
505
  nullptr, /* reorder_marks */
506
  HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
507
  false, /* fallback_position */
508
};