Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/lwbrk/LineBreaker.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "mozilla/intl/LineBreaker.h"
7
8
#include "jisx4051class.h"
9
#include "nsComplexBreaker.h"
10
#include "nsTArray.h"
11
#include "nsUnicodeProperties.h"
12
#include "mozilla/ArrayUtils.h"
13
14
using namespace mozilla::unicode;
15
using namespace mozilla::intl;
16
17
/*static*/
18
already_AddRefed<LineBreaker>
19
LineBreaker::Create()
20
3
{
21
3
  return RefPtr<LineBreaker>(new LineBreaker()).forget();
22
3
}
23
24
/*
25
26
   Simplification of Pair Table in JIS X 4051
27
28
   1. The Origion Table - in 4.1.3
29
30
   In JIS x 4051. The pair table is defined as below
31
32
   Class of
33
   Leading    Class of Trailing Char Class
34
   Char
35
36
              1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
37
                                                 *  #  *  #
38
        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
39
        2        X  X  X  X  X                                               X
40
        3        X  X  X  X  X                                               X
41
        4        X  X  X  X  X                                               X
42
        5        X  X  X  X  X                                               X
43
        6        X  X  X  X  X                                               X
44
        7        X  X  X  X  X  X                                            X
45
        8        X  X  X  X  X                                X              E
46
        9        X  X  X  X  X                                               X
47
       10        X  X  X  X  X                                               X
48
       11        X  X  X  X  X                                               X
49
       12        X  X  X  X  X                                               X
50
       13        X  X  X  X  X                    X                          X
51
       14        X  X  X  X  X                          X                    X
52
       15        X  X  X  X  X        X                       X        X     X
53
       16        X  X  X  X  X                                   X     X     X
54
       17        X  X  X  X  X                                               E
55
       18        X  X  X  X  X                                X  X     X     X
56
       19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
57
       20        X  X  X  X  X                                               E
58
59
   * Same Char
60
   # Other Char
61
62
   X Cannot Break
63
64
   The classes mean:
65
      1: Open parenthesis
66
      2: Close parenthesis
67
      3: Prohibit a line break before
68
      4: Punctuation for sentence end (except Full stop, e.g., "!" and "?")
69
      5: Middle dot (e.g., U+30FB KATAKANA MIDDLE DOT)
70
      6: Full stop
71
      7: Non-breakable between same characters
72
      8: Prefix (e.g., "$", "NO.")
73
      9: Postfix (e.g., "%")
74
     10: Ideographic space
75
     11: Hiragana
76
     12: Japanese characters (except class 11)
77
     13: Subscript
78
     14: Ruby
79
     15: Numeric
80
     16: Alphabet
81
     17: Space for Western language
82
     18: Western characters (except class 17)
83
     19: Split line note (Warichu) begin quote
84
     20: Split line note (Warichu) end quote
85
86
   2. Simplified by remove the class which we do not care
87
88
   However, since we do not care about class 13(Subscript), 14(Ruby),
89
   16 (Aphabet), 19(split line note begin quote), and 20(split line note end
90
   quote) we can simplify this par table into the following
91
92
   Class of
93
   Leading    Class of Trailing Char Class
94
   Char
95
96
              1  2  3  4  5  6  7  8  9 10 11 12 15 17 18
97
98
        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
99
        2        X  X  X  X  X
100
        3        X  X  X  X  X
101
        4        X  X  X  X  X
102
        5        X  X  X  X  X
103
        6        X  X  X  X  X
104
        7        X  X  X  X  X  X
105
        8        X  X  X  X  X                    X
106
        9        X  X  X  X  X
107
       10        X  X  X  X  X
108
       11        X  X  X  X  X
109
       12        X  X  X  X  X
110
       15        X  X  X  X  X        X           X     X
111
       17        X  X  X  X  X
112
       18        X  X  X  X  X                    X     X
113
114
   3. Simplified by merged classes
115
116
   After the 2 simplification, the pair table have some duplication
117
   a. class 2, 3, 4, 5, 6,  are the same- we can merged them
118
   b. class 10, 11, 12, 17  are the same- we can merged them
119
120
121
   Class of
122
   Leading    Class of Trailing Char Class
123
   Char
124
125
              1 [a] 7  8  9 [b]15 18
126
127
        1     X  X  X  X  X  X  X  X
128
      [a]        X
129
        7        X  X
130
        8        X              X
131
        9        X
132
      [b]        X
133
       15        X        X     X  X
134
       18        X              X  X
135
136
137
   4. We add COMPLEX characters and make it breakable w/ all ther class
138
      except after class 1 and before class [a]
139
140
   Class of
141
   Leading    Class of Trailing Char Class
142
   Char
143
144
              1 [a] 7  8  9 [b]15 18 COMPLEX
145
146
        1     X  X  X  X  X  X  X  X  X
147
      [a]        X
148
        7        X  X
149
        8        X              X
150
        9        X
151
      [b]        X
152
       15        X        X     X  X
153
       18        X              X  X
154
  COMPLEX        X                    T
155
156
     T : need special handling
157
158
159
   5. However, we need two special class for some punctuations/parentheses,
160
      theirs breaking rules like character class (18), see bug 389056.
161
      And also we need character like punctuation that is same behavior with 18,
162
      but the characters are not letters of all languages. (e.g., '_')
163
      [c]. Based on open parenthesis class (1), but it is not breakable after
164
           character class (18) or numeric class (15).
165
      [d]. Based on close parenthesis (or punctuation) class (2), but it is not
166
           breakable before character class (18) or numeric class (15).
167
168
   Class of
169
   Leading    Class of Trailing Char Class
170
   Char
171
172
              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d]
173
174
        1     X  X  X  X  X  X  X  X  X       X    X
175
      [a]        X                            X    X
176
        7        X  X
177
        8        X              X
178
        9        X
179
      [b]        X                                 X
180
       15        X        X     X  X          X    X
181
       18        X              X  X          X    X
182
  COMPLEX        X                    T
183
      [c]     X  X  X  X  X  X  X  X  X       X    X
184
      [d]        X              X  X               X
185
186
187
   6. And Unicode has "NON-BREAK" characters. The lines should be broken around
188
      them. But in JIS X 4051, such class is not, therefore, we create [e].
189
190
   Class of
191
   Leading    Class of Trailing Char Class
192
   Char
193
194
              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
195
196
        1     X  X  X  X  X  X  X  X  X       X    X   X
197
      [a]        X                                 X   X
198
        7        X  X                                  X
199
        8        X              X                      X
200
        9        X                                     X
201
      [b]        X                                 X   X
202
       15        X        X     X  X          X    X   X
203
       18        X              X  X          X    X   X
204
  COMPLEX        X                    T                X
205
      [c]     X  X  X  X  X  X  X  X  X       X    X   X
206
      [d]        X              X  X               X   X
207
      [e]     X  X  X  X  X  X  X  X  X       X    X   X
208
209
210
   7. Now we use one bit to encode weather it is breakable, and use 2 bytes
211
      for one row, then the bit table will look like:
212
213
                 18    <-   1
214
215
       1  0000 1111 1111 1111  = 0x0FFF
216
      [a] 0000 1100 0000 0010  = 0x0C02
217
       7  0000 1000 0000 0110  = 0x0806
218
       8  0000 1000 0100 0010  = 0x0842
219
       9  0000 1000 0000 0010  = 0x0802
220
      [b] 0000 1100 0000 0010  = 0x0C02
221
      15  0000 1110 1101 0010  = 0x0ED2
222
      18  0000 1110 1100 0010  = 0x0EC2
223
 COMPLEX  0000 1001 0000 0010  = 0x0902
224
      [c] 0000 1111 1111 1111  = 0x0FFF
225
      [d] 0000 1100 1100 0010  = 0x0CC2
226
      [e] 0000 1111 1111 1111  = 0x0FFF
227
*/
228
229
#define MAX_CLASSES 12
230
231
static const uint16_t gPair[MAX_CLASSES] = {
232
  0x0FFF,
233
  0x0C02,
234
  0x0806,
235
  0x0842,
236
  0x0802,
237
  0x0C02,
238
  0x0ED2,
239
  0x0EC2,
240
  0x0902,
241
  0x0FFF,
242
  0x0CC2,
243
  0x0FFF
244
};
245
246
247
/*
248
249
   8. And if the character is not enough far from word start, word end and
250
      another break point, we should not break in non-CJK languages.
251
      I.e., Don't break around 15, 18, [c] and [d], but don't change
252
      that if they are related to [b].
253
254
   Class of
255
   Leading    Class of Trailing Char Class
256
   Char
257
258
              1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
259
260
        1     X  X  X  X  X  X  X  X  X       X    X   X
261
      [a]        X              X  X          X    X   X
262
        7        X  X           X  X          X    X   X
263
        8        X              X  X          X    X   X
264
        9        X              X  X          X    X   X
265
      [b]        X                                 X   X
266
       15     X  X  X  X  X     X  X  X       X    X   X
267
       18     X  X  X  X  X     X  X  X       X    X   X
268
  COMPLEX        X              X  X  T       X    X   X
269
      [c]     X  X  X  X  X  X  X  X  X       X    X   X
270
      [d]     X  X  X  X  X     X  X  X       X    X   X
271
      [e]     X  X  X  X  X  X  X  X  X       X    X   X
272
273
                 18    <-   1
274
275
       1  0000 1111 1111 1111  = 0x0FFF
276
      [a] 0000 1110 1100 0010  = 0x0EC2
277
       7  0000 1110 1100 0110  = 0x0EC6
278
       8  0000 1110 1100 0010  = 0x0EC2
279
       9  0000 1110 1100 0010  = 0x0EC2
280
      [b] 0000 1100 0000 0010  = 0x0C02
281
      15  0000 1111 1101 1111  = 0x0FDF
282
      18  0000 1111 1101 1111  = 0x0FDF
283
 COMPLEX  0000 1111 1100 0010  = 0x0FC2
284
      [c] 0000 1111 1111 1111  = 0x0FFF
285
      [d] 0000 1111 1101 1111  = 0x0FDF
286
      [e] 0000 1111 1111 1111  = 0x0FFF
287
*/
288
289
static const uint16_t gPairConservative[MAX_CLASSES] = {
290
  0x0FFF,
291
  0x0EC2,
292
  0x0EC6,
293
  0x0EC2,
294
  0x0EC2,
295
  0x0C02,
296
  0x0FDF,
297
  0x0FDF,
298
  0x0FC2,
299
  0x0FFF,
300
  0x0FDF,
301
  0x0FFF
302
};
303
304
305
/*
306
307
   9. Now we map the class to number
308
309
      0: 1
310
      1: [a]- 2, 3, 4, 5, 6
311
      2: 7
312
      3: 8
313
      4: 9
314
      5: [b]- 10, 11, 12, 17
315
      6: 15
316
      7: 18
317
      8: COMPLEX
318
      9: [c]
319
      A: [d]
320
      B: [e]
321
322
    and they mean:
323
      0: Open parenthesis
324
      1: Punctuation that prohibits break before
325
      2: Non-breakable between same classes
326
      3: Prefix
327
      4: Postfix
328
      5: Breakable character (Spaces and Most Japanese characters)
329
      6: Numeric
330
      7: Characters
331
      8: Need special handling characters (E.g., Thai)
332
      9: Open parentheses like Character (See bug 389056)
333
      A: Close parenthese (or punctuations) like Character (See bug 389056)
334
      B: Non breakable (See bug 390920)
335
336
*/
337
338
0
#define CLASS_NONE                             INT8_MAX
339
340
0
#define CLASS_OPEN                             0x00
341
0
#define CLASS_CLOSE                            0x01
342
#define CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS 0x02
343
#define CLASS_PREFIX                           0x03
344
#define CLASS_POSTFFIX                         0x04
345
0
#define CLASS_BREAKABLE                        0x05
346
0
#define CLASS_NUMERIC                          0x06
347
0
#define CLASS_CHARACTER                        0x07
348
0
#define CLASS_COMPLEX                          0x08
349
0
#define CLASS_OPEN_LIKE_CHARACTER              0x09
350
0
#define CLASS_CLOSE_LIKE_CHARACTER             0x0A
351
0
#define CLASS_NON_BREAKABLE                    0x0B
352
353
0
#define U_NULL      char16_t(0x0000)
354
0
#define U_SLASH     char16_t('/')
355
#define U_SPACE     char16_t(' ')
356
0
#define U_HYPHEN    char16_t('-')
357
0
#define U_EQUAL     char16_t('=')
358
0
#define U_PERCENT   char16_t('%')
359
0
#define U_AMPERSAND char16_t('&')
360
0
#define U_SEMICOLON char16_t(';')
361
0
#define U_BACKSLASH char16_t('\\')
362
0
#define U_OPEN_SINGLE_QUOTE char16_t(0x2018)
363
0
#define U_OPEN_DOUBLE_QUOTE char16_t(0x201C)
364
0
#define U_OPEN_GUILLEMET    char16_t(0x00AB)
365
366
0
#define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \
367
0
                                     (c) == U_SLASH || \
368
0
                                     (c) == U_PERCENT || \
369
0
                                     (c) == U_AMPERSAND || \
370
0
                                     (c) == U_SEMICOLON || \
371
0
                                     (c) == U_BACKSLASH || \
372
0
                                     (c) == U_OPEN_SINGLE_QUOTE || \
373
0
                                     (c) == U_OPEN_DOUBLE_QUOTE || \
374
0
                                     (c) == U_OPEN_GUILLEMET)
375
376
0
#define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
377
378
static inline int
379
GETCLASSFROMTABLE(const uint32_t* t, uint16_t l)
380
0
{
381
0
  return ((((t)[(l>>3)]) >> ((l & 0x0007)<<2)) & 0x000f);
382
0
}
383
384
static inline int
385
IS_HALFWIDTH_IN_JISx4051_CLASS3(char16_t u)
386
0
{
387
0
  return ((0xff66 <= (u)) && ((u) <= 0xff70));
388
0
}
389
390
static inline int
391
IS_CJK_CHAR(char32_t u)
392
0
{
393
0
  return ((0x1100 <= (u) && (u) <= 0x11ff) ||
394
0
          (0x2e80 <= (u) && (u) <= 0xd7ff) ||
395
0
          (0xf900 <= (u) && (u) <= 0xfaff) ||
396
0
          (0xff00 <= (u) && (u) <= 0xffef) ||
397
0
          (0x20000 <= (u) && (u) <= 0x2fffd));
398
0
}
399
400
static inline bool
401
IS_NONBREAKABLE_SPACE(char16_t u)
402
0
{
403
0
  return u == 0x00A0 || u == 0x2007; // NO-BREAK SPACE, FIGURE SPACE
404
0
}
405
406
static inline bool
407
IS_HYPHEN(char16_t u)
408
0
{
409
0
  return (u == U_HYPHEN ||
410
0
          u == 0x058A || // ARMENIAN HYPHEN
411
0
          u == 0x2010 || // HYPHEN
412
0
          u == 0x2012 || // FIGURE DASH
413
0
          u == 0x2013);  // EN DASH
414
0
}
415
416
static int8_t
417
GetClass(uint32_t u)
418
0
{
419
0
  if (u < 0x10000) {
420
0
    uint16_t h = u & 0xFF00;
421
0
    uint16_t l = u & 0x00ff;
422
0
423
0
    // Handle 3 range table first
424
0
    if (0x0000 == h) {
425
0
      return GETCLASSFROMTABLE(gLBClass00, l);
426
0
    }
427
0
    if (0x1700 == h) {
428
0
      return GETCLASSFROMTABLE(gLBClass17, l);
429
0
    }
430
0
    if (NS_NeedsPlatformNativeHandling(u)) {
431
0
      return CLASS_COMPLEX;
432
0
    }
433
0
    if (0x0E00 == h) {
434
0
      return GETCLASSFROMTABLE(gLBClass0E, l);
435
0
    }
436
0
    if (0x2000 == h) {
437
0
      return GETCLASSFROMTABLE(gLBClass20, l);
438
0
    }
439
0
    if (0x2100 == h) {
440
0
      return GETCLASSFROMTABLE(gLBClass21, l);
441
0
    }
442
0
    if (0x3000 == h) {
443
0
      return GETCLASSFROMTABLE(gLBClass30, l);
444
0
    }
445
0
    if (0xff00 == h) {
446
0
      if (l < 0x0060) { // Fullwidth ASCII variant
447
0
        return GETCLASSFROMTABLE(gLBClass00, (l+0x20));
448
0
      }
449
0
      if (l < 0x00a0) { // Halfwidth Katakana variants
450
0
        switch (l) {
451
0
        case 0x61: return GetClass(0x3002);
452
0
        case 0x62: return GetClass(0x300c);
453
0
        case 0x63: return GetClass(0x300d);
454
0
        case 0x64: return GetClass(0x3001);
455
0
        case 0x65: return GetClass(0x30fb);
456
0
        case 0x9e: return GetClass(0x309b);
457
0
        case 0x9f: return GetClass(0x309c);
458
0
        default:
459
0
          if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u)) {
460
0
            return CLASS_CLOSE; // jis x4051 class 3
461
0
          }
462
0
          return CLASS_BREAKABLE; // jis x4051 class 11
463
0
        }
464
0
      }
465
0
      if (l < 0x00e0) {
466
0
        return CLASS_CHARACTER; // Halfwidth Hangul variants
467
0
      }
468
0
      if (l < 0x00f0) {
469
0
        static char16_t NarrowFFEx[16] = {
470
0
          0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
471
0
          0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000
472
0
        };
473
0
        return GetClass(NarrowFFEx[l - 0x00e0]);
474
0
      }
475
0
    } else if (0x3100 == h) {
476
0
      if (l <= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
477
0
                       // XXX: This is per UAX #14, but UAX #14 may change
478
0
                       // the line breaking rules about Kanbun and Bopomofo.
479
0
        return CLASS_BREAKABLE;
480
0
      }
481
0
      if (l >= 0xf0) { // Katakana small letters for Ainu
482
0
        return CLASS_CLOSE;
483
0
      }
484
0
    } else if (0x0300 == h) {
485
0
      if (0x4F == l || (0x5C <= l && l <= 0x62)) {
486
0
        return CLASS_NON_BREAKABLE;
487
0
      }
488
0
    } else if (0x0500 == h) {
489
0
      // ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
490
0
      if (l == 0x8A) {
491
0
        return GETCLASSFROMTABLE(gLBClass00, uint16_t(U_HYPHEN));
492
0
      }
493
0
    } else if (0x0F00 == h) {
494
0
      if (0x08 == l || 0x0C == l || 0x12 == l) {
495
0
        return CLASS_NON_BREAKABLE;
496
0
      }
497
0
    } else if (0x1800 == h) {
498
0
      if (0x0E == l) {
499
0
        return CLASS_NON_BREAKABLE;
500
0
      }
501
0
    } else if (0x1600 == h) {
502
0
      if (0x80 == l) { // U+1680 OGHAM SPACE MARK
503
0
        return CLASS_BREAKABLE;
504
0
      }
505
0
    } else if (u == 0xfeff) {
506
0
      return CLASS_NON_BREAKABLE;
507
0
    }
508
0
  }
509
0
510
0
  // Mapping for Unicode LineBreak.txt classes to the (simplified) set of
511
0
  // character classes used here.
512
0
  // XXX The mappings here were derived by comparing the Unicode LineBreak
513
0
  //     values of BMP characters to the classes our existing GetClass returns
514
0
  //     for the same codepoints; in cases where characters with the same
515
0
  //     LineBreak class mapped to various classes here, I picked what seemed
516
0
  //     the most prevalent equivalence.
517
0
  //     Some of these are unclear to me, but currently they are ONLY used
518
0
  //     for characters not handled by the old code above, so all the JISx405
519
0
  //     special cases should already be accounted for.
520
0
  static const int8_t sUnicodeLineBreakToClass[] = {
521
0
    /* UNKNOWN = 0,                       [XX] */ CLASS_CHARACTER,
522
0
    /* AMBIGUOUS = 1,                     [AI] */ CLASS_CHARACTER,
523
0
    /* ALPHABETIC = 2,                    [AL] */ CLASS_CHARACTER,
524
0
    /* BREAK_BOTH = 3,                    [B2] */ CLASS_CHARACTER,
525
0
    /* BREAK_AFTER = 4,                   [BA] */ CLASS_CHARACTER,
526
0
    /* BREAK_BEFORE = 5,                  [BB] */ CLASS_OPEN_LIKE_CHARACTER,
527
0
    /* MANDATORY_BREAK = 6,               [BK] */ CLASS_CHARACTER,
528
0
    /* CONTINGENT_BREAK = 7,              [CB] */ CLASS_CHARACTER,
529
0
    /* CLOSE_PUNCTUATION = 8,             [CL] */ CLASS_CHARACTER,
530
0
    /* COMBINING_MARK = 9,                [CM] */ CLASS_CHARACTER,
531
0
    /* CARRIAGE_RETURN = 10,              [CR] */ CLASS_BREAKABLE,
532
0
    /* EXCLAMATION = 11,                  [EX] */ CLASS_CHARACTER,
533
0
    /* GLUE = 12,                         [GL] */ CLASS_NON_BREAKABLE,
534
0
    /* HYPHEN = 13,                       [HY] */ CLASS_CHARACTER,
535
0
    /* IDEOGRAPHIC = 14,                  [ID] */ CLASS_BREAKABLE,
536
0
    /* INSEPARABLE = 15,                  [IN] */ CLASS_CLOSE_LIKE_CHARACTER,
537
0
    /* INFIX_NUMERIC = 16,                [IS] */ CLASS_CHARACTER,
538
0
    /* LINE_FEED = 17,                    [LF] */ CLASS_BREAKABLE,
539
0
    /* NONSTARTER = 18,                   [NS] */ CLASS_CLOSE_LIKE_CHARACTER,
540
0
    /* NUMERIC = 19,                      [NU] */ CLASS_CHARACTER,
541
0
    /* OPEN_PUNCTUATION = 20,             [OP] */ CLASS_CHARACTER,
542
0
    /* POSTFIX_NUMERIC = 21,              [PO] */ CLASS_CHARACTER,
543
0
    /* PREFIX_NUMERIC = 22,               [PR] */ CLASS_CHARACTER,
544
0
    /* QUOTATION = 23,                    [QU] */ CLASS_CHARACTER,
545
0
    /* COMPLEX_CONTEXT = 24,              [SA] */ CLASS_CHARACTER,
546
0
    /* SURROGATE = 25,                    [SG] */ CLASS_CHARACTER,
547
0
    /* SPACE = 26,                        [SP] */ CLASS_BREAKABLE,
548
0
    /* BREAK_SYMBOLS = 27,                [SY] */ CLASS_CHARACTER,
549
0
    /* ZWSPACE = 28,                      [ZW] */ CLASS_BREAKABLE,
550
0
    /* NEXT_LINE = 29,                    [NL] */ CLASS_CHARACTER,
551
0
    /* WORD_JOINER = 30,                  [WJ] */ CLASS_NON_BREAKABLE,
552
0
    /* H2 = 31,                           [H2] */ CLASS_BREAKABLE,
553
0
    /* H3 = 32,                           [H3] */ CLASS_BREAKABLE,
554
0
    /* JL = 33,                           [JL] */ CLASS_CHARACTER,
555
0
    /* JT = 34,                           [JT] */ CLASS_CHARACTER,
556
0
    /* JV = 35,                           [JV] */ CLASS_CHARACTER,
557
0
    /* CLOSE_PARENTHESIS = 36,            [CP] */ CLASS_CLOSE_LIKE_CHARACTER,
558
0
    /* CONDITIONAL_JAPANESE_STARTER = 37, [CJ] */ CLASS_CLOSE,
559
0
    /* HEBREW_LETTER = 38,                [HL] */ CLASS_CHARACTER,
560
0
    /* REGIONAL_INDICATOR = 39,           [RI] */ CLASS_CHARACTER,
561
0
    /* E_BASE = 40,                       [EB] */ CLASS_BREAKABLE,
562
0
    /* E_MODIFIER = 41,                   [EM] */ CLASS_CHARACTER,
563
0
    /* ZWJ = 42,                          [ZWJ]*/ CLASS_CHARACTER
564
0
  };
565
0
566
0
  static_assert(U_LB_COUNT == mozilla::ArrayLength(sUnicodeLineBreakToClass),
567
0
                "Gecko vs ICU LineBreak class mismatch");
568
0
569
0
  auto cls = mozilla::unicode::GetLineBreakClass(u);
570
0
  MOZ_ASSERT(cls < mozilla::ArrayLength(sUnicodeLineBreakToClass));
571
0
  return sUnicodeLineBreakToClass[cls];
572
0
}
573
574
static bool
575
GetPair(int8_t c1, int8_t c2)
576
0
{
577
0
  NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
578
0
  NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
579
0
580
0
  return (0 == ((gPair[c1] >> c2) & 0x0001));
581
0
}
582
583
static bool
584
GetPairConservative(int8_t c1, int8_t c2)
585
0
{
586
0
  NS_ASSERTION(c1 < MAX_CLASSES ,"illegal classes 1");
587
0
  NS_ASSERTION(c2 < MAX_CLASSES ,"illegal classes 2");
588
0
589
0
  return (0 == ((gPairConservative[c1] >> c2) & 0x0001));
590
0
}
591
592
class ContextState {
593
public:
594
  ContextState(const char16_t* aText, uint32_t aLength)
595
    : mUniText(aText)
596
    , mText(nullptr)
597
    , mLength(aLength)
598
0
  {
599
0
    Init();
600
0
  }
601
602
  ContextState(const uint8_t* aText, uint32_t aLength)
603
    : mUniText(nullptr)
604
    , mText(aText)
605
    , mLength(aLength)
606
0
  {
607
0
    Init();
608
0
  }
609
610
0
  uint32_t Length() const { return mLength; }
611
0
  uint32_t Index() const { return mIndex; }
612
613
  // This gets a single code unit of the text, without checking for surrogates
614
  // (in the case of a 16-bit text buffer). That's OK if we're only checking for
615
  // specific characters that are known to be BMP values.
616
0
  char16_t GetCodeUnitAt(uint32_t aIndex) const {
617
0
    MOZ_ASSERT(aIndex < mLength, "Out of range!");
618
0
    return mUniText ? mUniText[aIndex] : char16_t(mText[aIndex]);
619
0
  }
620
621
  // This gets a 32-bit Unicode character (codepoint), handling surrogate pairs
622
  // as necessary. It must ONLY be called for 16-bit text, not 8-bit.
623
0
  char32_t GetUnicodeCharAt(uint32_t aIndex) const {
624
0
    MOZ_ASSERT(mUniText, "Only for 16-bit text!");
625
0
    MOZ_ASSERT(aIndex < mLength, "Out of range!");
626
0
    char32_t c = mUniText[aIndex];
627
0
    if (NS_IS_HIGH_SURROGATE(c) && aIndex + 1 < mLength &&
628
0
        NS_IS_LOW_SURROGATE(mUniText[aIndex + 1])) {
629
0
      c = SURROGATE_TO_UCS4(c, mUniText[aIndex + 1]);
630
0
    }
631
0
    return c;
632
0
  }
633
634
0
  void AdvanceIndex() {
635
0
    ++mIndex;
636
0
  }
637
638
0
  void NotifyBreakBefore() { mLastBreakIndex = mIndex; }
639
640
// A word of western language should not be broken. But even if the word has
641
// only ASCII characters, non-natural context words should be broken, e.g.,
642
// URL and file path. For protecting the natural words, we should use
643
// conservative breaking rules at following conditions:
644
//   1. at near the start of word
645
//   2. at near the end of word
646
//   3. at near the latest broken point
647
// CONSERVATIVE_RANGE_{LETTER,OTHER} define the 'near' in characters,
648
// which varies depending whether we are looking at a letter or a non-letter
649
// character: for non-letters, we use an extended "conservative" range.
650
651
0
#define CONSERVATIVE_RANGE_LETTER 2
652
0
#define CONSERVATIVE_RANGE_OTHER  6
653
654
0
  bool UseConservativeBreaking(uint32_t aOffset = 0) const {
655
0
    if (mHasCJKChar)
656
0
      return false;
657
0
    uint32_t index = mIndex + aOffset;
658
0
659
0
    // If the character at index is a letter (rather than various punctuation
660
0
    // characters, etc) then we want a shorter "conservative" range
661
0
    uint32_t conservativeRangeStart, conservativeRangeEnd;
662
0
    if (index < mLength &&
663
0
        nsUGenCategory::kLetter ==
664
0
          (mText ? GetGenCategory(mText[index])
665
0
                 : GetGenCategory(GetUnicodeCharAt(index)))) {
666
0
      // Primarily for hyphenated word prefixes/suffixes; we add 1 to Start
667
0
      // to get more balanced behavior (if we break off a 2-letter prefix,
668
0
      // that means the break will actually be three letters from start of
669
0
      // word, to include the hyphen; whereas a 2-letter suffix will be
670
0
      // broken only two letters from end of word).
671
0
      conservativeRangeEnd = CONSERVATIVE_RANGE_LETTER;
672
0
      conservativeRangeStart = CONSERVATIVE_RANGE_LETTER + 1;
673
0
    } else {
674
0
      conservativeRangeEnd = conservativeRangeStart = CONSERVATIVE_RANGE_OTHER;
675
0
    }
676
0
677
0
    bool result = (index < conservativeRangeStart ||
678
0
                     mLength - index < conservativeRangeEnd ||
679
0
                     index - mLastBreakIndex < conservativeRangeStart);
680
0
    if (result || !mHasNonbreakableSpace)
681
0
      return result;
682
0
683
0
    // This text has no-breakable space, we need to check whether the index
684
0
    // is near it.
685
0
686
0
    // Note that index is always larger than conservativeRange here.
687
0
    for (uint32_t i = index; index - conservativeRangeStart < i; --i) {
688
0
      if (IS_NONBREAKABLE_SPACE(GetCodeUnitAt(i - 1)))
689
0
        return true;
690
0
    }
691
0
    // Note that index is always less than mLength - conservativeRange.
692
0
    for (uint32_t i = index + 1; i < index + conservativeRangeEnd; ++i) {
693
0
      if (IS_NONBREAKABLE_SPACE(GetCodeUnitAt(i)))
694
0
        return true;
695
0
    }
696
0
    return false;
697
0
  }
698
699
0
  bool HasPreviousEqualsSign() const {
700
0
    return mHasPreviousEqualsSign;
701
0
  }
702
0
  void NotifySeenEqualsSign() {
703
0
    mHasPreviousEqualsSign = true;
704
0
  }
705
706
0
  bool HasPreviousSlash() const {
707
0
    return mHasPreviousSlash;
708
0
  }
709
0
  void NotifySeenSlash() {
710
0
    mHasPreviousSlash = true;
711
0
  }
712
713
0
  bool HasPreviousBackslash() const {
714
0
    return mHasPreviousBackslash;
715
0
  }
716
0
  void NotifySeenBackslash() {
717
0
    mHasPreviousBackslash = true;
718
0
  }
719
720
0
  uint32_t GetPreviousNonHyphenCharacter() const {
721
0
    return mPreviousNonHyphenCharacter;
722
0
  }
723
0
  void NotifyNonHyphenCharacter(uint32_t ch) {
724
0
    mPreviousNonHyphenCharacter = ch;
725
0
  }
726
727
private:
728
0
  void Init() {
729
0
    mIndex = 0;
730
0
    mLastBreakIndex = 0;
731
0
    mPreviousNonHyphenCharacter = U_NULL;
732
0
    mHasCJKChar = false;
733
0
    mHasNonbreakableSpace = false;
734
0
    mHasPreviousEqualsSign = false;
735
0
    mHasPreviousSlash = false;
736
0
    mHasPreviousBackslash = false;
737
0
738
0
    if (mText) {
739
0
      // 8-bit text: we only need to check for &nbsp;
740
0
      for (uint32_t i = 0; i < mLength; ++i) {
741
0
        if (IS_NONBREAKABLE_SPACE(mText[i])) {
742
0
          mHasNonbreakableSpace = true;
743
0
          break;
744
0
        }
745
0
      }
746
0
    } else {
747
0
      // 16-bit text: handle surrogates and check for CJK as well as &nbsp;
748
0
      for (uint32_t i = 0; i < mLength; ++i) {
749
0
        char32_t u = GetUnicodeCharAt(i);
750
0
        if (!mHasNonbreakableSpace && IS_NONBREAKABLE_SPACE(u)) {
751
0
          mHasNonbreakableSpace = true;
752
0
          if (mHasCJKChar) {
753
0
            break;
754
0
          }
755
0
        } else if (!mHasCJKChar && IS_CJK_CHAR(u)) {
756
0
          mHasCJKChar = 1;
757
0
          if (mHasNonbreakableSpace) {
758
0
            break;
759
0
          }
760
0
        }
761
0
        if (u > 0xFFFFu) {
762
0
          ++i; // step over trailing low surrogate
763
0
        }
764
0
      }
765
0
    }
766
0
  }
767
768
  const char16_t* const mUniText;
769
  const uint8_t* const mText;
770
771
  uint32_t mIndex;
772
  const uint32_t mLength;         // length of text
773
  uint32_t mLastBreakIndex;
774
  char32_t mPreviousNonHyphenCharacter; // The last character we have seen
775
                                         // which is not U_HYPHEN
776
  bool mHasCJKChar; // if the text has CJK character, this is true.
777
  bool mHasNonbreakableSpace; // if the text has no-breakable space,
778
                                     // this is true.
779
  bool mHasPreviousEqualsSign; // True if we have seen a U_EQUAL
780
  bool mHasPreviousSlash;      // True if we have seen a U_SLASH
781
  bool mHasPreviousBackslash;  // True if we have seen a U_BACKSLASH
782
};
783
784
static int8_t
785
ContextualAnalysis(char32_t prev, char32_t cur, char32_t next,
786
                   ContextState &aState)
787
0
{
788
0
  // Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
789
0
790
0
  if (IS_HYPHEN(cur)) {
791
0
    // If next character is hyphen, we don't need to break between them.
792
0
    if (IS_HYPHEN(next))
793
0
      return CLASS_CHARACTER;
794
0
    // If prev and next characters are numeric, it may be in Math context.
795
0
    // So, we should not break here.
796
0
    bool prevIsNum = IS_ASCII_DIGIT(prev);
797
0
    bool nextIsNum = IS_ASCII_DIGIT(next);
798
0
    if (prevIsNum && nextIsNum)
799
0
      return CLASS_NUMERIC;
800
0
    // If one side is numeric and the other is a character, or if both sides are
801
0
    // characters, the hyphen should be breakable.
802
0
    if (!aState.UseConservativeBreaking(1)) {
803
0
      char32_t prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
804
0
      if (prevOfHyphen && next) {
805
0
        int8_t prevClass = GetClass(prevOfHyphen);
806
0
        int8_t nextClass = GetClass(next);
807
0
        bool prevIsNumOrCharOrClose =
808
0
          prevIsNum ||
809
0
          (prevClass == CLASS_CHARACTER &&
810
0
            !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen)) ||
811
0
          prevClass == CLASS_CLOSE ||
812
0
          prevClass == CLASS_CLOSE_LIKE_CHARACTER;
813
0
        bool nextIsNumOrCharOrOpen =
814
0
          nextIsNum ||
815
0
          (nextClass == CLASS_CHARACTER && !NEED_CONTEXTUAL_ANALYSIS(next)) ||
816
0
          nextClass == CLASS_OPEN ||
817
0
          nextClass == CLASS_OPEN_LIKE_CHARACTER ||
818
0
          next == U_OPEN_SINGLE_QUOTE ||
819
0
          next == U_OPEN_DOUBLE_QUOTE ||
820
0
          next == U_OPEN_GUILLEMET;
821
0
        if (prevIsNumOrCharOrClose && nextIsNumOrCharOrOpen) {
822
0
          return CLASS_CLOSE;
823
0
        }
824
0
      }
825
0
    }
826
0
  } else {
827
0
    aState.NotifyNonHyphenCharacter(cur);
828
0
    if (cur == U_SLASH || cur == U_BACKSLASH) {
829
0
      // If this is immediately after same char, we should not break here.
830
0
      if (prev == cur)
831
0
        return CLASS_CHARACTER;
832
0
      // If this text has two or more (BACK)SLASHs, this may be file path or URL.
833
0
      // Make sure to compute shouldReturn before we notify on this slash.
834
0
      bool shouldReturn = !aState.UseConservativeBreaking() &&
835
0
        (cur == U_SLASH ?
836
0
         aState.HasPreviousSlash() : aState.HasPreviousBackslash());
837
0
838
0
      if (cur == U_SLASH) {
839
0
        aState.NotifySeenSlash();
840
0
      } else {
841
0
        aState.NotifySeenBackslash();
842
0
      }
843
0
844
0
      if (shouldReturn)
845
0
        return CLASS_OPEN;
846
0
    } else if (cur == U_PERCENT) {
847
0
      // If this is a part of the param of URL, we should break before.
848
0
      if (!aState.UseConservativeBreaking()) {
849
0
        if (aState.Index() >= 3 &&
850
0
            aState.GetCodeUnitAt(aState.Index() - 3) == U_PERCENT)
851
0
          return CLASS_OPEN;
852
0
        if (aState.Index() + 3 < aState.Length() &&
853
0
            aState.GetCodeUnitAt(aState.Index() + 3) == U_PERCENT)
854
0
          return CLASS_OPEN;
855
0
      }
856
0
    } else if (cur == U_AMPERSAND || cur == U_SEMICOLON) {
857
0
      // If this may be a separator of params of URL, we should break after.
858
0
      if (!aState.UseConservativeBreaking(1) &&
859
0
          aState.HasPreviousEqualsSign())
860
0
        return CLASS_CLOSE;
861
0
    } else if (cur == U_OPEN_SINGLE_QUOTE ||
862
0
               cur == U_OPEN_DOUBLE_QUOTE ||
863
0
               cur == U_OPEN_GUILLEMET) {
864
0
      // for CJK usage, we treat these as openers to allow a break before them,
865
0
      // but otherwise treat them as normal characters because quote mark usage
866
0
      // in various Western languages varies too much; see bug #450088 discussion.
867
0
      if (!aState.UseConservativeBreaking() && IS_CJK_CHAR(next))
868
0
        return CLASS_OPEN;
869
0
    } else {
870
0
      NS_ERROR("Forgot to handle the current character!");
871
0
    }
872
0
  }
873
0
  return GetClass(cur);
874
0
}
875
876
877
int32_t
878
LineBreaker::WordMove(const char16_t* aText, uint32_t aLen,
879
                      uint32_t aPos, int8_t aDirection)
880
0
{
881
0
  bool    textNeedsJISx4051 = false;
882
0
  int32_t begin, end;
883
0
884
0
  for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
885
0
    if (IS_CJK_CHAR(aText[begin]) || NS_NeedsPlatformNativeHandling(aText[begin])) {
886
0
      textNeedsJISx4051 = true;
887
0
    }
888
0
  }
889
0
  for (end = aPos + 1; end < int32_t(aLen) && !NS_IsSpace(aText[end]); ++end) {
890
0
    if (IS_CJK_CHAR(aText[end]) || NS_NeedsPlatformNativeHandling(aText[end])) {
891
0
      textNeedsJISx4051 = true;
892
0
    }
893
0
  }
894
0
895
0
  int32_t ret;
896
0
  AutoTArray<uint8_t, 2000> breakState;
897
0
  if (!textNeedsJISx4051 || !breakState.AppendElements(end - begin)) {
898
0
    // No complex text character, do not try to do complex line break.
899
0
    // (This is required for serializers. See Bug #344816.)
900
0
    // Also fall back to this when out of memory.
901
0
    if (aDirection < 0) {
902
0
      ret = (begin == int32_t(aPos)) ? begin - 1 : begin;
903
0
    } else {
904
0
      ret = end;
905
0
    }
906
0
  } else {
907
0
    GetJISx4051Breaks(aText + begin, end - begin, LineBreaker::kWordBreak_Normal,
908
0
                      breakState.Elements());
909
0
910
0
    ret = aPos;
911
0
    do {
912
0
      ret += aDirection;
913
0
    } while (begin < ret && ret < end && !breakState[ret - begin]);
914
0
  }
915
0
916
0
  return ret;
917
0
}
918
919
int32_t
920
LineBreaker::Next(const char16_t* aText, uint32_t aLen,
921
                  uint32_t aPos)
922
0
{
923
0
  NS_ASSERTION(aText, "aText shouldn't be null");
924
0
  NS_ASSERTION(aLen > aPos, "Bad position passed to nsJISx4051LineBreaker::Next");
925
0
926
0
  int32_t nextPos = WordMove(aText, aLen, aPos, 1);
927
0
  return nextPos < int32_t(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
928
0
}
929
930
int32_t
931
LineBreaker::Prev(const char16_t* aText, uint32_t aLen,
932
                  uint32_t aPos)
933
0
{
934
0
  NS_ASSERTION(aText, "aText shouldn't be null");
935
0
  NS_ASSERTION(aLen >= aPos && aPos > 0,
936
0
               "Bad position passed to nsJISx4051LineBreaker::Prev");
937
0
938
0
  int32_t prevPos = WordMove(aText, aLen, aPos, -1);
939
0
  return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
940
0
}
941
942
void
943
LineBreaker::GetJISx4051Breaks(const char16_t* aChars, uint32_t aLength,
944
                               uint8_t aWordBreak,
945
                               uint8_t* aBreakBefore)
946
0
{
947
0
  uint32_t cur;
948
0
  int8_t lastClass = CLASS_NONE;
949
0
  ContextState state(aChars, aLength);
950
0
951
0
  for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
952
0
    char32_t ch = state.GetUnicodeCharAt(cur);
953
0
    uint32_t chLen = ch > 0xFFFFu ? 2 : 1;
954
0
    int8_t cl;
955
0
956
0
    if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
957
0
      char32_t prev, next;
958
0
      if (cur > 0) {
959
0
        // not using state.GetUnicodeCharAt() here because we're looking back
960
0
        // rather than forward for possible surrogates
961
0
        prev = aChars[cur - 1];
962
0
        if (NS_IS_LOW_SURROGATE(prev) && cur > 1 &&
963
0
            NS_IS_HIGH_SURROGATE(aChars[cur - 2])) {
964
0
          prev = SURROGATE_TO_UCS4(aChars[cur - 2], prev);
965
0
        }
966
0
      } else {
967
0
        prev = 0;
968
0
      }
969
0
      if (cur + chLen < aLength) {
970
0
        next = state.GetUnicodeCharAt(cur + chLen);
971
0
      } else {
972
0
        next = 0;
973
0
      }
974
0
      cl = ContextualAnalysis(prev, ch, next, state);
975
0
    } else {
976
0
      if (ch == U_EQUAL)
977
0
        state.NotifySeenEqualsSign();
978
0
      state.NotifyNonHyphenCharacter(ch);
979
0
      cl = GetClass(ch);
980
0
    }
981
0
982
0
    bool allowBreak = false;
983
0
    if (cur > 0) {
984
0
      NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
985
0
                   "Loop should have prevented adjacent complex chars here");
986
0
      if (aWordBreak == LineBreaker::kWordBreak_Normal) {
987
0
        allowBreak = (state.UseConservativeBreaking()) ?
988
0
          GetPairConservative(lastClass, cl) : GetPair(lastClass, cl);
989
0
      } else if (aWordBreak == LineBreaker::kWordBreak_BreakAll) {
990
0
        allowBreak = true;
991
0
      }
992
0
    }
993
0
    aBreakBefore[cur] = allowBreak;
994
0
    if (allowBreak)
995
0
      state.NotifyBreakBefore();
996
0
    lastClass = cl;
997
0
    if (CLASS_COMPLEX == cl) {
998
0
      uint32_t end = cur + chLen;
999
0
1000
0
      while (end < aLength) {
1001
0
        char32_t c = state.GetUnicodeCharAt(end);
1002
0
        if (CLASS_COMPLEX != GetClass(c)) {
1003
0
          break;
1004
0
        }
1005
0
        ++end;
1006
0
        if (c > 0xFFFFU) { // it was a surrogate pair
1007
0
          ++end;
1008
0
        }
1009
0
      }
1010
0
1011
0
      NS_GetComplexLineBreaks(aChars + cur, end - cur, aBreakBefore + cur);
1012
0
1013
0
      // We have to consider word-break value again for complex characters
1014
0
      if (aWordBreak != LineBreaker::kWordBreak_Normal) {
1015
0
        // Respect word-break property
1016
0
        for (uint32_t i = cur; i < end; i++)
1017
0
          aBreakBefore[i] = (aWordBreak == LineBreaker::kWordBreak_BreakAll);
1018
0
      }
1019
0
1020
0
      // restore breakability at chunk begin, which was always set to false
1021
0
      // by the complex line breaker
1022
0
      aBreakBefore[cur] = allowBreak;
1023
0
1024
0
      cur = end - 1;
1025
0
    }
1026
0
1027
0
    if (chLen == 2) {
1028
0
      // Supplementary-plane character: mark that we cannot break before the
1029
0
      // trailing low surrogate, and advance past it.
1030
0
      ++cur;
1031
0
      aBreakBefore[cur] = false;
1032
0
      state.AdvanceIndex();
1033
0
    }
1034
0
  }
1035
0
}
1036
1037
void
1038
LineBreaker::GetJISx4051Breaks(const uint8_t* aChars, uint32_t aLength,
1039
                               uint8_t aWordBreak,
1040
                               uint8_t* aBreakBefore)
1041
0
{
1042
0
  uint32_t cur;
1043
0
  int8_t lastClass = CLASS_NONE;
1044
0
  ContextState state(aChars, aLength);
1045
0
1046
0
  for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
1047
0
    char32_t ch = aChars[cur];
1048
0
    int8_t cl;
1049
0
1050
0
    if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
1051
0
      cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
1052
0
                              ch,
1053
0
                              cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
1054
0
                              state);
1055
0
    } else {
1056
0
      if (ch == U_EQUAL)
1057
0
        state.NotifySeenEqualsSign();
1058
0
      state.NotifyNonHyphenCharacter(ch);
1059
0
      cl = GetClass(ch);
1060
0
    }
1061
0
1062
0
    bool allowBreak = false;
1063
0
    if (cur > 0) {
1064
0
      if (aWordBreak == LineBreaker::kWordBreak_Normal) {
1065
0
        allowBreak = (state.UseConservativeBreaking()) ?
1066
0
          GetPairConservative(lastClass, cl) : GetPair(lastClass, cl);
1067
0
      } else if (aWordBreak == LineBreaker::kWordBreak_BreakAll) {
1068
0
        allowBreak = true;
1069
0
      }
1070
0
    }
1071
0
    aBreakBefore[cur] = allowBreak;
1072
0
    if (allowBreak)
1073
0
      state.NotifyBreakBefore();
1074
0
    lastClass = cl;
1075
0
  }
1076
0
}