Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/base/nsLineBreaker.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "nsLineBreaker.h"
8
#include "nsContentUtils.h"
9
#include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10
#include "nsHyphenationManager.h"
11
#include "nsHyphenator.h"
12
#include "mozilla/gfx/2D.h"
13
#include "mozilla/intl/LineBreaker.h"
14
15
using mozilla::intl::LineBreaker;
16
17
nsLineBreaker::nsLineBreaker()
18
  : mCurrentWordLanguage(nullptr),
19
    mCurrentWordContainsMixedLang(false),
20
    mCurrentWordContainsComplexChar(false),
21
    mAfterBreakableSpace(false), mBreakHere(false),
22
    mWordBreak(LineBreaker::kWordBreak_Normal)
23
0
{
24
0
}
25
26
nsLineBreaker::~nsLineBreaker()
27
0
{
28
0
  NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
29
0
}
30
31
static void
32
SetupCapitalization(const char16_t* aWord, uint32_t aLength,
33
                    bool* aCapitalization)
34
0
{
35
0
  // Capitalize the first alphanumeric character after a space or start
36
0
  // of the word.
37
0
  // The only space character a word can contain is NBSP.
38
0
  bool capitalizeNextChar = true;
39
0
  for (uint32_t i = 0; i < aLength; ++i) {
40
0
    uint32_t ch = aWord[i];
41
0
    if (capitalizeNextChar) {
42
0
      if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
43
0
          NS_IS_LOW_SURROGATE(aWord[i + 1])) {
44
0
        ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
45
0
      }
46
0
      if (nsContentUtils::IsAlphanumeric(ch)) {
47
0
        aCapitalization[i] = true;
48
0
        capitalizeNextChar = false;
49
0
      }
50
0
      if (!IS_IN_BMP(ch)) {
51
0
        ++i;
52
0
      }
53
0
    }
54
0
    if (ch == 0xA0 /*NBSP*/) {
55
0
      capitalizeNextChar = true;
56
0
    }
57
0
  }
58
0
}
59
60
nsresult
61
nsLineBreaker::FlushCurrentWord()
62
0
{
63
0
  uint32_t length = mCurrentWord.Length();
64
0
  AutoTArray<uint8_t,4000> breakState;
65
0
  if (!breakState.AppendElements(length))
66
0
    return NS_ERROR_OUT_OF_MEMORY;
67
0
68
0
  nsTArray<bool> capitalizationState;
69
0
70
0
  if (!mCurrentWordContainsComplexChar) {
71
0
    // For break-strict set everything internal to "break", otherwise
72
0
    // to "no break"!
73
0
    memset(breakState.Elements(),
74
0
           mWordBreak == LineBreaker::kWordBreak_BreakAll ?
75
0
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
76
0
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
77
0
           length*sizeof(uint8_t));
78
0
  } else {
79
0
    nsContentUtils::LineBreaker()->
80
0
      GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
81
0
                        breakState.Elements());
82
0
  }
83
0
84
0
  bool autoHyphenate = mCurrentWordLanguage &&
85
0
    !mCurrentWordContainsMixedLang;
86
0
  uint32_t i;
87
0
  for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
88
0
    TextItem* ti = &mTextItems[i];
89
0
    if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
90
0
      autoHyphenate = false;
91
0
    }
92
0
  }
93
0
  if (autoHyphenate) {
94
0
    RefPtr<nsHyphenator> hyphenator =
95
0
      nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
96
0
    if (hyphenator) {
97
0
      FindHyphenationPoints(hyphenator,
98
0
                            mCurrentWord.Elements(),
99
0
                            mCurrentWord.Elements() + length,
100
0
                            breakState.Elements());
101
0
    }
102
0
  }
103
0
104
0
  uint32_t offset = 0;
105
0
  for (i = 0; i < mTextItems.Length(); ++i) {
106
0
    TextItem* ti = &mTextItems[i];
107
0
    NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
108
0
109
0
    if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
110
0
      breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
111
0
    }
112
0
    if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
113
0
      uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
114
0
      memset(breakState.Elements() + offset + exclude,
115
0
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
116
0
             (ti->mLength - exclude)*sizeof(uint8_t));
117
0
    }
118
0
119
0
    // Don't set the break state for the first character of the word, because
120
0
    // it was already set correctly earlier and we don't know what the true
121
0
    // value should be.
122
0
    uint32_t skipSet = i == 0 ? 1 : 0;
123
0
    if (ti->mSink) {
124
0
      ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
125
0
                           breakState.Elements() + offset + skipSet);
126
0
127
0
      if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
128
0
        if (capitalizationState.Length() == 0) {
129
0
          if (!capitalizationState.AppendElements(length))
130
0
            return NS_ERROR_OUT_OF_MEMORY;
131
0
          memset(capitalizationState.Elements(), false, length*sizeof(bool));
132
0
          SetupCapitalization(mCurrentWord.Elements(), length,
133
0
                              capitalizationState.Elements());
134
0
        }
135
0
        ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
136
0
                                     capitalizationState.Elements() + offset);
137
0
      }
138
0
    }
139
0
140
0
    offset += ti->mLength;
141
0
  }
142
0
143
0
  mCurrentWord.Clear();
144
0
  mTextItems.Clear();
145
0
  mCurrentWordContainsComplexChar = false;
146
0
  mCurrentWordContainsMixedLang = false;
147
0
  mCurrentWordLanguage = nullptr;
148
0
  return NS_OK;
149
0
}
150
151
// If the aFlags parameter to AppendText has all these bits set,
152
// then we don't need to worry about finding break opportunities
153
// in the appended text.
154
0
#define NO_BREAKS_NEEDED_FLAGS (BREAK_SUPPRESS_INITIAL | \
155
0
                                BREAK_SUPPRESS_INSIDE | \
156
0
                                BREAK_SKIP_SETTING_NO_BREAKS)
157
158
nsresult
159
nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
160
                          uint32_t aFlags, nsILineBreakSink* aSink)
161
0
{
162
0
  NS_ASSERTION(aLength > 0, "Appending empty text...");
163
0
164
0
  uint32_t offset = 0;
165
0
166
0
  // Continue the current word
167
0
  if (mCurrentWord.Length() > 0) {
168
0
    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
169
0
170
0
    while (offset < aLength && !IsSpace(aText[offset])) {
171
0
      mCurrentWord.AppendElement(aText[offset]);
172
0
      if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
173
0
        mCurrentWordContainsComplexChar = true;
174
0
      }
175
0
      UpdateCurrentWordLanguage(aHyphenationLanguage);
176
0
      ++offset;
177
0
    }
178
0
179
0
    if (offset > 0) {
180
0
      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
181
0
    }
182
0
183
0
    if (offset == aLength)
184
0
      return NS_OK;
185
0
186
0
    // We encountered whitespace, so we're done with this word
187
0
    nsresult rv = FlushCurrentWord();
188
0
    if (NS_FAILED(rv))
189
0
      return rv;
190
0
  }
191
0
192
0
  AutoTArray<uint8_t,4000> breakState;
193
0
  if (aSink) {
194
0
    if (!breakState.AppendElements(aLength))
195
0
      return NS_ERROR_OUT_OF_MEMORY;
196
0
  }
197
0
198
0
  bool noCapitalizationNeeded = true;
199
0
  nsTArray<bool> capitalizationState;
200
0
  if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
201
0
    if (!capitalizationState.AppendElements(aLength))
202
0
      return NS_ERROR_OUT_OF_MEMORY;
203
0
    memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
204
0
    noCapitalizationNeeded = false;
205
0
  }
206
0
207
0
  uint32_t start = offset;
208
0
  bool noBreaksNeeded = !aSink ||
209
0
    ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
210
0
     !mBreakHere && !mAfterBreakableSpace);
211
0
  if (noBreaksNeeded && noCapitalizationNeeded) {
212
0
    // Skip to the space before the last word, since either the break data
213
0
    // here is not needed, or no breaks are set in the sink and there cannot
214
0
    // be any breaks in this chunk; and we don't need to do word-initial
215
0
    // capitalization. All we need is the context for the next chunk (if any).
216
0
    offset = aLength;
217
0
    while (offset > start) {
218
0
      --offset;
219
0
      if (IsSpace(aText[offset]))
220
0
        break;
221
0
    }
222
0
  }
223
0
  uint32_t wordStart = offset;
224
0
  bool wordHasComplexChar = false;
225
0
226
0
  RefPtr<nsHyphenator> hyphenator;
227
0
  if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
228
0
      !(aFlags & BREAK_SUPPRESS_INSIDE) &&
229
0
      aHyphenationLanguage) {
230
0
    hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
231
0
  }
232
0
233
0
  for (;;) {
234
0
    char16_t ch = aText[offset];
235
0
    bool isSpace = IsSpace(ch);
236
0
    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
237
0
238
0
    if (aSink && !noBreaksNeeded) {
239
0
      breakState[offset] =
240
0
        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
241
0
        (mWordBreak == LineBreaker::kWordBreak_BreakAll)  ?
242
0
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
243
0
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
244
0
    }
245
0
    mBreakHere = false;
246
0
    mAfterBreakableSpace = isBreakableSpace;
247
0
248
0
    if (isSpace || ch == '\n') {
249
0
      if (offset > wordStart && aSink) {
250
0
        if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
251
0
          if (wordHasComplexChar) {
252
0
            // Save current start-of-word state because GetJISx4051Breaks will
253
0
            // set it to false
254
0
            uint8_t currentStart = breakState[wordStart];
255
0
            nsContentUtils::LineBreaker()->
256
0
              GetJISx4051Breaks(aText + wordStart, offset - wordStart,
257
0
                                mWordBreak,
258
0
                                breakState.Elements() + wordStart);
259
0
            breakState[wordStart] = currentStart;
260
0
          }
261
0
          if (hyphenator) {
262
0
            FindHyphenationPoints(hyphenator,
263
0
                                  aText + wordStart, aText + offset,
264
0
                                  breakState.Elements() + wordStart);
265
0
          }
266
0
        }
267
0
        if (!noCapitalizationNeeded) {
268
0
          SetupCapitalization(aText + wordStart, offset - wordStart,
269
0
                              capitalizationState.Elements() + wordStart);
270
0
        }
271
0
      }
272
0
      wordHasComplexChar = false;
273
0
      ++offset;
274
0
      if (offset >= aLength)
275
0
        break;
276
0
      wordStart = offset;
277
0
    } else {
278
0
      if (!wordHasComplexChar && IsComplexChar(ch)) {
279
0
        wordHasComplexChar = true;
280
0
      }
281
0
      ++offset;
282
0
      if (offset >= aLength) {
283
0
        // Save this word
284
0
        mCurrentWordContainsComplexChar = wordHasComplexChar;
285
0
        uint32_t len = offset - wordStart;
286
0
        char16_t* elems = mCurrentWord.AppendElements(len);
287
0
        if (!elems)
288
0
          return NS_ERROR_OUT_OF_MEMORY;
289
0
        memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
290
0
        mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
291
0
        // Ensure that the break-before for this word is written out
292
0
        offset = wordStart + 1;
293
0
        UpdateCurrentWordLanguage(aHyphenationLanguage);
294
0
        break;
295
0
      }
296
0
    }
297
0
  }
298
0
299
0
  if (aSink) {
300
0
    if (!noBreaksNeeded) {
301
0
      aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
302
0
    }
303
0
    if (!noCapitalizationNeeded) {
304
0
      aSink->SetCapitalization(start, offset - start,
305
0
                               capitalizationState.Elements() + start);
306
0
    }
307
0
  }
308
0
  return NS_OK;
309
0
}
310
311
void
312
nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
313
                                     const char16_t *aTextStart,
314
                                     const char16_t *aTextLimit,
315
                                     uint8_t *aBreakState)
316
0
{
317
0
  nsDependentSubstring string(aTextStart, aTextLimit);
318
0
  AutoTArray<bool,200> hyphens;
319
0
  if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
320
0
    for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
321
0
      if (hyphens[i]) {
322
0
        aBreakState[i + 1] =
323
0
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
324
0
      }
325
0
    }
326
0
  }
327
0
}
328
329
nsresult
330
nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
331
                          uint32_t aFlags, nsILineBreakSink* aSink)
332
0
{
333
0
  NS_ASSERTION(aLength > 0, "Appending empty text...");
334
0
335
0
  if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
336
0
    // Defer to the Unicode path if capitalization or hyphenation is required
337
0
    nsAutoString str;
338
0
    const char* cp = reinterpret_cast<const char*>(aText);
339
0
    CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
340
0
    return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
341
0
  }
342
0
343
0
  uint32_t offset = 0;
344
0
345
0
  // Continue the current word
346
0
  if (mCurrentWord.Length() > 0) {
347
0
    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
348
0
349
0
    while (offset < aLength && !IsSpace(aText[offset])) {
350
0
      mCurrentWord.AppendElement(aText[offset]);
351
0
      if (!mCurrentWordContainsComplexChar &&
352
0
          IsComplexASCIIChar(aText[offset])) {
353
0
        mCurrentWordContainsComplexChar = true;
354
0
      }
355
0
      ++offset;
356
0
    }
357
0
358
0
    if (offset > 0) {
359
0
      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
360
0
    }
361
0
362
0
    if (offset == aLength) {
363
0
      // We did not encounter whitespace so the word hasn't finished yet.
364
0
      return NS_OK;
365
0
    }
366
0
367
0
    // We encountered whitespace, so we're done with this word
368
0
    nsresult rv = FlushCurrentWord();
369
0
    if (NS_FAILED(rv))
370
0
      return rv;
371
0
  }
372
0
373
0
  AutoTArray<uint8_t,4000> breakState;
374
0
  if (aSink) {
375
0
    if (!breakState.AppendElements(aLength))
376
0
      return NS_ERROR_OUT_OF_MEMORY;
377
0
  }
378
0
379
0
  uint32_t start = offset;
380
0
  bool noBreaksNeeded = !aSink ||
381
0
    ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
382
0
     !mBreakHere && !mAfterBreakableSpace);
383
0
  if (noBreaksNeeded) {
384
0
    // Skip to the space before the last word, since either the break data
385
0
    // here is not needed, or no breaks are set in the sink and there cannot
386
0
    // be any breaks in this chunk; all we need is the context for the next
387
0
    // chunk (if any)
388
0
    offset = aLength;
389
0
    while (offset > start) {
390
0
      --offset;
391
0
      if (IsSpace(aText[offset]))
392
0
        break;
393
0
    }
394
0
  }
395
0
  uint32_t wordStart = offset;
396
0
  bool wordHasComplexChar = false;
397
0
398
0
  for (;;) {
399
0
    uint8_t ch = aText[offset];
400
0
    bool isSpace = IsSpace(ch);
401
0
    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
402
0
403
0
    if (aSink) {
404
0
      // Consider word-break style.  Since the break position of CJK scripts
405
0
      // will be set by nsILineBreaker, we don't consider CJK at this point.
406
0
      breakState[offset] =
407
0
        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
408
0
        (mWordBreak == LineBreaker::kWordBreak_BreakAll) ?
409
0
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
410
0
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
411
0
    }
412
0
    mBreakHere = false;
413
0
    mAfterBreakableSpace = isBreakableSpace;
414
0
415
0
    if (isSpace) {
416
0
      if (offset > wordStart && wordHasComplexChar) {
417
0
        if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
418
0
          // Save current start-of-word state because GetJISx4051Breaks will
419
0
          // set it to false
420
0
          uint8_t currentStart = breakState[wordStart];
421
0
          nsContentUtils::LineBreaker()->
422
0
            GetJISx4051Breaks(aText + wordStart, offset - wordStart,
423
0
                              mWordBreak,
424
0
                              breakState.Elements() + wordStart);
425
0
          breakState[wordStart] = currentStart;
426
0
        }
427
0
        wordHasComplexChar = false;
428
0
      }
429
0
430
0
      ++offset;
431
0
      if (offset >= aLength)
432
0
        break;
433
0
      wordStart = offset;
434
0
    } else {
435
0
      if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
436
0
        wordHasComplexChar = true;
437
0
      }
438
0
      ++offset;
439
0
      if (offset >= aLength) {
440
0
        // Save this word
441
0
        mCurrentWordContainsComplexChar = wordHasComplexChar;
442
0
        uint32_t len = offset - wordStart;
443
0
        char16_t* elems = mCurrentWord.AppendElements(len);
444
0
        if (!elems)
445
0
          return NS_ERROR_OUT_OF_MEMORY;
446
0
        uint32_t i;
447
0
        for (i = wordStart; i < offset; ++i) {
448
0
          elems[i - wordStart] = aText[i];
449
0
        }
450
0
        mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
451
0
        // Ensure that the break-before for this word is written out
452
0
        offset = wordStart + 1;
453
0
        break;
454
0
      }
455
0
    }
456
0
  }
457
0
458
0
  if (!noBreaksNeeded) {
459
0
    aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
460
0
  }
461
0
  return NS_OK;
462
0
}
463
464
void
465
nsLineBreaker::UpdateCurrentWordLanguage(nsAtom *aHyphenationLanguage)
466
0
{
467
0
  if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
468
0
    mCurrentWordContainsMixedLang = true;
469
0
  } else {
470
0
    mCurrentWordLanguage = aHyphenationLanguage;
471
0
  }
472
0
}
473
474
nsresult
475
nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
476
0
{
477
0
  nsresult rv = FlushCurrentWord();
478
0
  if (NS_FAILED(rv))
479
0
    return rv;
480
0
481
0
  bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
482
0
  if (mAfterBreakableSpace && !isBreakableSpace) {
483
0
    mBreakHere = true;
484
0
  }
485
0
  mAfterBreakableSpace = isBreakableSpace;
486
0
  return NS_OK;
487
0
}
488
489
nsresult
490
nsLineBreaker::Reset(bool* aTrailingBreak)
491
0
{
492
0
  nsresult rv = FlushCurrentWord();
493
0
  if (NS_FAILED(rv))
494
0
    return rv;
495
0
496
0
  *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
497
0
  mBreakHere = false;
498
0
  mAfterBreakableSpace = false;
499
0
  return NS_OK;
500
0
}