/src/mozilla-central/dom/base/nsLineBreaker.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsLineBreaker.h"
#include "nsContentUtils.h"
#include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
#include "nsHyphenationManager.h"
#include "nsHyphenator.h"
#include "mozilla/gfx/2D.h"
#include "mozilla/intl/LineBreaker.h"

using mozilla::intl::LineBreaker;

nsLineBreaker::nsLineBreaker()
  : mCurrentWordLanguage(nullptr),
    mCurrentWordContainsMixedLang(false),
    mCurrentWordContainsComplexChar(false),
    mAfterBreakableSpace(false), mBreakHere(false),
    mWordBreak(LineBreaker::kWordBreak_Normal)
{
}

nsLineBreaker::~nsLineBreaker()
{
  NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
}

static void
SetupCapitalization(const char16_t* aWord, uint32_t aLength,
                    bool* aCapitalization)
{
  // Capitalize the first alphanumeric character after a space or start
  // of the word.
  // The only space character a word can contain is NBSP.
  bool capitalizeNextChar = true;
  for (uint32_t i = 0; i < aLength; ++i) {
    uint32_t ch = aWord[i];
    if (capitalizeNextChar) {
      if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
          NS_IS_LOW_SURROGATE(aWord[i + 1])) {
        ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
      }
      if (nsContentUtils::IsAlphanumeric(ch)) {
        aCapitalization[i] = true;
        capitalizeNextChar = false;
      }
      if (!IS_IN_BMP(ch)) {
        ++i;
      }
    }
    if (ch == 0xA0 /*NBSP*/) {
      capitalizeNextChar = true;
    }
  }
}

nsresult
nsLineBreaker::FlushCurrentWord()
{
  uint32_t length = mCurrentWord.Length();
  AutoTArray<uint8_t,4000> breakState;
  if (!breakState.AppendElements(length))
    return NS_ERROR_OUT_OF_MEMORY;

  nsTArray<bool> capitalizationState;

  if (!mCurrentWordContainsComplexChar) {
    // For break-strict set everything internal to "break", otherwise
    // to "no break"!
    memset(breakState.Elements(),
           mWordBreak == LineBreaker::kWordBreak_BreakAll ?
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
           length*sizeof(uint8_t));
  } else {
    nsContentUtils::LineBreaker()->
      GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
                        breakState.Elements());
  }

  bool autoHyphenate = mCurrentWordLanguage &&
    !mCurrentWordContainsMixedLang;
  uint32_t i;
  for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
    TextItem* ti = &mTextItems[i];
    if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
      autoHyphenate = false;
    }
  }
  if (autoHyphenate) {
    RefPtr<nsHyphenator> hyphenator =
      nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
    if (hyphenator) {
      FindHyphenationPoints(hyphenator,
                            mCurrentWord.Elements(),
                            mCurrentWord.Elements() + length,
                            breakState.Elements());
    }
  }

  uint32_t offset = 0;
  for (i = 0; i < mTextItems.Length(); ++i) {
    TextItem* ti = &mTextItems[i];
    NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");

    if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
      breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    }
    if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
      uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
      memset(breakState.Elements() + offset + exclude,
             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
             (ti->mLength - exclude)*sizeof(uint8_t));
    }

    // Don't set the break state for the first character of the word, because
    // it was already set correctly earlier and we don't know what the true
    // value should be.
    uint32_t skipSet = i == 0 ? 1 : 0;
    if (ti->mSink) {
      ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
                           breakState.Elements() + offset + skipSet);

      if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
        if (capitalizationState.Length() == 0) {
          if (!capitalizationState.AppendElements(length))
            return NS_ERROR_OUT_OF_MEMORY;
          memset(capitalizationState.Elements(), false, length*sizeof(bool));
          SetupCapitalization(mCurrentWord.Elements(), length,
                              capitalizationState.Elements());
        }
        ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
                                     capitalizationState.Elements() + offset);
      }
    }

    offset += ti->mLength;
  }

  mCurrentWord.Clear();
  mTextItems.Clear();
  mCurrentWordContainsComplexChar = false;
  mCurrentWordContainsMixedLang = false;
  mCurrentWordLanguage = nullptr;
  return NS_OK;
}

// If the aFlags parameter to AppendText has all these bits set,
// then we don't need to worry about finding break opportunities
// in the appended text.
#define NO_BREAKS_NEEDED_FLAGS (BREAK_SUPPRESS_INITIAL | \
                                BREAK_SUPPRESS_INSIDE | \
                                BREAK_SKIP_SETTING_NO_BREAKS)

nsresult
nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
                          uint32_t aFlags, nsILineBreakSink* aSink)
{
  NS_ASSERTION(aLength > 0, "Appending empty text...");

  uint32_t offset = 0;

  // Continue the current word
  if (mCurrentWord.Length() > 0) {
    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");

    while (offset < aLength && !IsSpace(aText[offset])) {
      mCurrentWord.AppendElement(aText[offset]);
      if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
        mCurrentWordContainsComplexChar = true;
      }
      UpdateCurrentWordLanguage(aHyphenationLanguage);
      ++offset;
    }

    if (offset > 0) {
      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
    }

    if (offset == aLength)
      return NS_OK;

    // We encountered whitespace, so we're done with this word
    nsresult rv = FlushCurrentWord();
    if (NS_FAILED(rv))
      return rv;
  }

  AutoTArray<uint8_t,4000> breakState;
  if (aSink) {
    if (!breakState.AppendElements(aLength))
      return NS_ERROR_OUT_OF_MEMORY;
  }

  bool noCapitalizationNeeded = true;
  nsTArray<bool> capitalizationState;
  if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
    if (!capitalizationState.AppendElements(aLength))
      return NS_ERROR_OUT_OF_MEMORY;
    memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
    noCapitalizationNeeded = false;
  }

  uint32_t start = offset;
  bool noBreaksNeeded = !aSink ||
    ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
     !mBreakHere && !mAfterBreakableSpace);
  if (noBreaksNeeded && noCapitalizationNeeded) {
    // Skip to the space before the last word, since either the break data
    // here is not needed, or no breaks are set in the sink and there cannot
    // be any breaks in this chunk; and we don't need to do word-initial
    // capitalization. All we need is the context for the next chunk (if any).
    offset = aLength;
    while (offset > start) {
      --offset;
      if (IsSpace(aText[offset]))
        break;
    }
  }
  uint32_t wordStart = offset;
  bool wordHasComplexChar = false;

  RefPtr<nsHyphenator> hyphenator;
  if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
      !(aFlags & BREAK_SUPPRESS_INSIDE) &&
      aHyphenationLanguage) {
    hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
  }

  for (;;) {
    char16_t ch = aText[offset];
    bool isSpace = IsSpace(ch);
    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);

    if (aSink && !noBreaksNeeded) {
      breakState[offset] =
        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
        (mWordBreak == LineBreaker::kWordBreak_BreakAll)  ?
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    }
    mBreakHere = false;
    mAfterBreakableSpace = isBreakableSpace;

    if (isSpace || ch == '\n') {
      if (offset > wordStart && aSink) {
        if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
          if (wordHasComplexChar) {
            // Save current start-of-word state because GetJISx4051Breaks will
            // set it to false
            uint8_t currentStart = breakState[wordStart];
            nsContentUtils::LineBreaker()->
              GetJISx4051Breaks(aText + wordStart, offset - wordStart,
                                mWordBreak,
                                breakState.Elements() + wordStart);
            breakState[wordStart] = currentStart;
          }
          if (hyphenator) {
            FindHyphenationPoints(hyphenator,
                                  aText + wordStart, aText + offset,
                                  breakState.Elements() + wordStart);
          }
        }
        if (!noCapitalizationNeeded) {
          SetupCapitalization(aText + wordStart, offset - wordStart,
                              capitalizationState.Elements() + wordStart);
        }
      }
      wordHasComplexChar = false;
      ++offset;
      if (offset >= aLength)
        break;
      wordStart = offset;
    } else {
      if (!wordHasComplexChar && IsComplexChar(ch)) {
        wordHasComplexChar = true;
      }
      ++offset;
      if (offset >= aLength) {
        // Save this word
        mCurrentWordContainsComplexChar = wordHasComplexChar;
        uint32_t len = offset - wordStart;
        char16_t* elems = mCurrentWord.AppendElements(len);
        if (!elems)
          return NS_ERROR_OUT_OF_MEMORY;
        memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
        mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
        // Ensure that the break-before for this word is written out
        offset = wordStart + 1;
        UpdateCurrentWordLanguage(aHyphenationLanguage);
        break;
      }
    }
  }

  if (aSink) {
    if (!noBreaksNeeded) {
      aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
    }
    if (!noCapitalizationNeeded) {
      aSink->SetCapitalization(start, offset - start,
                               capitalizationState.Elements() + start);
    }
  }
  return NS_OK;
}

void
nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
                                     const char16_t *aTextStart,
                                     const char16_t *aTextLimit,
                                     uint8_t *aBreakState)
{
  nsDependentSubstring string(aTextStart, aTextLimit);
  AutoTArray<bool,200> hyphens;
  if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
    for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
      if (hyphens[i]) {
        aBreakState[i + 1] =
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
      }
    }
  }
}

nsresult
nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
                          uint32_t aFlags, nsILineBreakSink* aSink)
{
  NS_ASSERTION(aLength > 0, "Appending empty text...");

  if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
    // Defer to the Unicode path if capitalization or hyphenation is required
    nsAutoString str;
    const char* cp = reinterpret_cast<const char*>(aText);
    CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
    return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
  }

  uint32_t offset = 0;

  // Continue the current word
  if (mCurrentWord.Length() > 0) {
    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");

    while (offset < aLength && !IsSpace(aText[offset])) {
      mCurrentWord.AppendElement(aText[offset]);
      if (!mCurrentWordContainsComplexChar &&
          IsComplexASCIIChar(aText[offset])) {
        mCurrentWordContainsComplexChar = true;
      }
      ++offset;
    }

    if (offset > 0) {
      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
    }

    if (offset == aLength) {
      // We did not encounter whitespace so the word hasn't finished yet.
      return NS_OK;
    }

    // We encountered whitespace, so we're done with this word
    nsresult rv = FlushCurrentWord();
    if (NS_FAILED(rv))
      return rv;
  }

  AutoTArray<uint8_t,4000> breakState;
  if (aSink) {
    if (!breakState.AppendElements(aLength))
      return NS_ERROR_OUT_OF_MEMORY;
  }

  uint32_t start = offset;
  bool noBreaksNeeded = !aSink ||
    ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
     !mBreakHere && !mAfterBreakableSpace);
  if (noBreaksNeeded) {
    // Skip to the space before the last word, since either the break data
    // here is not needed, or no breaks are set in the sink and there cannot
    // be any breaks in this chunk; all we need is the context for the next
    // chunk (if any)
    offset = aLength;
    while (offset > start) {
      --offset;
      if (IsSpace(aText[offset]))
        break;
    }
  }
  uint32_t wordStart = offset;
  bool wordHasComplexChar = false;

  for (;;) {
    uint8_t ch = aText[offset];
    bool isSpace = IsSpace(ch);
    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);

    if (aSink) {
      // Consider word-break style.  Since the break position of CJK scripts
      // will be set by nsILineBreaker, we don't consider CJK at this point.
      breakState[offset] =
        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
        (mWordBreak == LineBreaker::kWordBreak_BreakAll) ?
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
          gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    }
    mBreakHere = false;
    mAfterBreakableSpace = isBreakableSpace;

    if (isSpace) {
      if (offset > wordStart && wordHasComplexChar) {
        if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
          // Save current start-of-word state because GetJISx4051Breaks will
          // set it to false
          uint8_t currentStart = breakState[wordStart];
          nsContentUtils::LineBreaker()->
            GetJISx4051Breaks(aText + wordStart, offset - wordStart,
                              mWordBreak,
                              breakState.Elements() + wordStart);
          breakState[wordStart] = currentStart;
        }
        wordHasComplexChar = false;
      }

      ++offset;
      if (offset >= aLength)
        break;
      wordStart = offset;
    } else {
      if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
        wordHasComplexChar = true;
      }
      ++offset;
      if (offset >= aLength) {
        // Save this word
        mCurrentWordContainsComplexChar = wordHasComplexChar;
        uint32_t len = offset - wordStart;
        char16_t* elems = mCurrentWord.AppendElements(len);
        if (!elems)
          return NS_ERROR_OUT_OF_MEMORY;
        uint32_t i;
        for (i = wordStart; i < offset; ++i) {
          elems[i - wordStart] = aText[i];
        }
        mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
        // Ensure that the break-before for this word is written out
        offset = wordStart + 1;
        break;
      }
    }
  }

  if (!noBreaksNeeded) {
    aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
  }
  return NS_OK;
}

void
nsLineBreaker::UpdateCurrentWordLanguage(nsAtom *aHyphenationLanguage)
{
  if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
    mCurrentWordContainsMixedLang = true;
  } else {
    mCurrentWordLanguage = aHyphenationLanguage;
  }
}

nsresult
nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
{
  nsresult rv = FlushCurrentWord();
  if (NS_FAILED(rv))
    return rv;

  bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
  if (mAfterBreakableSpace && !isBreakableSpace) {
    mBreakHere = true;
  }
  mAfterBreakableSpace = isBreakableSpace;
  return NS_OK;
}

nsresult
nsLineBreaker::Reset(bool* aTrailingBreak)
{
  nsresult rv = FlushCurrentWord();
  if (NS_FAILED(rv))
    return rv;

  *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
  mBreakHere = false;
  mAfterBreakableSpace = false;
  return NS_OK;
}

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#include "nsLineBreaker.h"
8		#include "nsContentUtils.h"
9		#include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10		#include "nsHyphenationManager.h"
11		#include "nsHyphenator.h"
12		#include "mozilla/gfx/2D.h"
13		#include "mozilla/intl/LineBreaker.h"
14
15		using mozilla::intl::LineBreaker;
16
17		nsLineBreaker::nsLineBreaker()
18		: mCurrentWordLanguage(nullptr),
19		mCurrentWordContainsMixedLang(false),
20		mCurrentWordContainsComplexChar(false),
21		mAfterBreakableSpace(false), mBreakHere(false),
22		mWordBreak(LineBreaker::kWordBreak_Normal)
23	0	{
24	0	}
25
26		nsLineBreaker::~nsLineBreaker()
27	0	{
28	0	NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
29	0	}
30
31		static void
32		SetupCapitalization(const char16_t* aWord, uint32_t aLength,
33		bool* aCapitalization)
34	0	{
35	0	// Capitalize the first alphanumeric character after a space or start
36	0	// of the word.
37	0	// The only space character a word can contain is NBSP.
38	0	bool capitalizeNextChar = true;
39	0	for (uint32_t i = 0; i < aLength; ++i) {
40	0	uint32_t ch = aWord[i];
41	0	if (capitalizeNextChar) {
42	0	if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
43	0	NS_IS_LOW_SURROGATE(aWord[i + 1])) {
44	0	ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
45	0	}
46	0	if (nsContentUtils::IsAlphanumeric(ch)) {
47	0	aCapitalization[i] = true;
48	0	capitalizeNextChar = false;
49	0	}
50	0	if (!IS_IN_BMP(ch)) {
51	0	++i;
52	0	}
53	0	}
54	0	if (ch == 0xA0 /NBSP/) {
55	0	capitalizeNextChar = true;
56	0	}
57	0	}
58	0	}
59
60		nsresult
61		nsLineBreaker::FlushCurrentWord()
62	0	{
63	0	uint32_t length = mCurrentWord.Length();
64	0	AutoTArray<uint8_t,4000> breakState;
65	0	if (!breakState.AppendElements(length))
66	0	return NS_ERROR_OUT_OF_MEMORY;
67	0
68	0	nsTArray<bool> capitalizationState;
69	0
70	0	if (!mCurrentWordContainsComplexChar) {
71	0	// For break-strict set everything internal to "break", otherwise
72	0	// to "no break"!
73	0	memset(breakState.Elements(),
74	0	mWordBreak == LineBreaker::kWordBreak_BreakAll ?
75	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
76	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
77	0	length*sizeof(uint8_t));
78	0	} else {
79	0	nsContentUtils::LineBreaker()->
80	0	GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
81	0	breakState.Elements());
82	0	}
83	0
84	0	bool autoHyphenate = mCurrentWordLanguage &&
85	0	!mCurrentWordContainsMixedLang;
86	0	uint32_t i;
87	0	for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
88	0	TextItem* ti = &mTextItems[i];
89	0	if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
90	0	autoHyphenate = false;
91	0	}
92	0	}
93	0	if (autoHyphenate) {
94	0	RefPtr<nsHyphenator> hyphenator =
95	0	nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
96	0	if (hyphenator) {
97	0	FindHyphenationPoints(hyphenator,
98	0	mCurrentWord.Elements(),
99	0	mCurrentWord.Elements() + length,
100	0	breakState.Elements());
101	0	}
102	0	}
103	0
104	0	uint32_t offset = 0;
105	0	for (i = 0; i < mTextItems.Length(); ++i) {
106	0	TextItem* ti = &mTextItems[i];
107	0	NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
108	0
109	0	if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
110	0	breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
111	0	}
112	0	if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
113	0	uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
114	0	memset(breakState.Elements() + offset + exclude,
115	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
116	0	(ti->mLength - exclude)*sizeof(uint8_t));
117	0	}
118	0
119	0	// Don't set the break state for the first character of the word, because
120	0	// it was already set correctly earlier and we don't know what the true
121	0	// value should be.
122	0	uint32_t skipSet = i == 0 ? 1 : 0;
123	0	if (ti->mSink) {
124	0	ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
125	0	breakState.Elements() + offset + skipSet);
126	0
127	0	if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
128	0	if (capitalizationState.Length() == 0) {
129	0	if (!capitalizationState.AppendElements(length))
130	0	return NS_ERROR_OUT_OF_MEMORY;
131	0	memset(capitalizationState.Elements(), false, length*sizeof(bool));
132	0	SetupCapitalization(mCurrentWord.Elements(), length,
133	0	capitalizationState.Elements());
134	0	}
135	0	ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
136	0	capitalizationState.Elements() + offset);
137	0	}
138	0	}
139	0
140	0	offset += ti->mLength;
141	0	}
142	0
143	0	mCurrentWord.Clear();
144	0	mTextItems.Clear();
145	0	mCurrentWordContainsComplexChar = false;
146	0	mCurrentWordContainsMixedLang = false;
147	0	mCurrentWordLanguage = nullptr;
148	0	return NS_OK;
149	0	}
150
151		// If the aFlags parameter to AppendText has all these bits set,
152		// then we don't need to worry about finding break opportunities
153		// in the appended text.
154	0	#define NO_BREAKS_NEEDED_FLAGS (BREAK_SUPPRESS_INITIAL \| \
155	0	BREAK_SUPPRESS_INSIDE \| \
156	0	BREAK_SKIP_SETTING_NO_BREAKS)
157
158		nsresult
159		nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
160		uint32_t aFlags, nsILineBreakSink* aSink)
161	0	{
162	0	NS_ASSERTION(aLength > 0, "Appending empty text...");
163	0
164	0	uint32_t offset = 0;
165	0
166	0	// Continue the current word
167	0	if (mCurrentWord.Length() > 0) {
168	0	NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
169	0
170	0	while (offset < aLength && !IsSpace(aText[offset])) {
171	0	mCurrentWord.AppendElement(aText[offset]);
172	0	if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
173	0	mCurrentWordContainsComplexChar = true;
174	0	}
175	0	UpdateCurrentWordLanguage(aHyphenationLanguage);
176	0	++offset;
177	0	}
178	0
179	0	if (offset > 0) {
180	0	mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
181	0	}
182	0
183	0	if (offset == aLength)
184	0	return NS_OK;
185	0
186	0	// We encountered whitespace, so we're done with this word
187	0	nsresult rv = FlushCurrentWord();
188	0	if (NS_FAILED(rv))
189	0	return rv;
190	0	}
191	0
192	0	AutoTArray<uint8_t,4000> breakState;
193	0	if (aSink) {
194	0	if (!breakState.AppendElements(aLength))
195	0	return NS_ERROR_OUT_OF_MEMORY;
196	0	}
197	0
198	0	bool noCapitalizationNeeded = true;
199	0	nsTArray<bool> capitalizationState;
200	0	if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
201	0	if (!capitalizationState.AppendElements(aLength))
202	0	return NS_ERROR_OUT_OF_MEMORY;
203	0	memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
204	0	noCapitalizationNeeded = false;
205	0	}
206	0
207	0	uint32_t start = offset;
208	0	bool noBreaksNeeded = !aSink \|\|
209	0	((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
210	0	!mBreakHere && !mAfterBreakableSpace);
211	0	if (noBreaksNeeded && noCapitalizationNeeded) {
212	0	// Skip to the space before the last word, since either the break data
213	0	// here is not needed, or no breaks are set in the sink and there cannot
214	0	// be any breaks in this chunk; and we don't need to do word-initial
215	0	// capitalization. All we need is the context for the next chunk (if any).
216	0	offset = aLength;
217	0	while (offset > start) {
218	0	--offset;
219	0	if (IsSpace(aText[offset]))
220	0	break;
221	0	}
222	0	}
223	0	uint32_t wordStart = offset;
224	0	bool wordHasComplexChar = false;
225	0
226	0	RefPtr<nsHyphenator> hyphenator;
227	0	if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
228	0	!(aFlags & BREAK_SUPPRESS_INSIDE) &&
229	0	aHyphenationLanguage) {
230	0	hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
231	0	}
232	0
233	0	for (;;) {
234	0	char16_t ch = aText[offset];
235	0	bool isSpace = IsSpace(ch);
236	0	bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
237	0
238	0	if (aSink && !noBreaksNeeded) {
239	0	breakState[offset] =
240	0	mBreakHere \|\| (mAfterBreakableSpace && !isBreakableSpace) \|\|
241	0	(mWordBreak == LineBreaker::kWordBreak_BreakAll) ?
242	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
243	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
244	0	}
245	0	mBreakHere = false;
246	0	mAfterBreakableSpace = isBreakableSpace;
247	0
248	0	if (isSpace \|\| ch == '\n') {
249	0	if (offset > wordStart && aSink) {
250	0	if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
251	0	if (wordHasComplexChar) {
252	0	// Save current start-of-word state because GetJISx4051Breaks will
253	0	// set it to false
254	0	uint8_t currentStart = breakState[wordStart];
255	0	nsContentUtils::LineBreaker()->
256	0	GetJISx4051Breaks(aText + wordStart, offset - wordStart,
257	0	mWordBreak,
258	0	breakState.Elements() + wordStart);
259	0	breakState[wordStart] = currentStart;
260	0	}
261	0	if (hyphenator) {
262	0	FindHyphenationPoints(hyphenator,
263	0	aText + wordStart, aText + offset,
264	0	breakState.Elements() + wordStart);
265	0	}
266	0	}
267	0	if (!noCapitalizationNeeded) {
268	0	SetupCapitalization(aText + wordStart, offset - wordStart,
269	0	capitalizationState.Elements() + wordStart);
270	0	}
271	0	}
272	0	wordHasComplexChar = false;
273	0	++offset;
274	0	if (offset >= aLength)
275	0	break;
276	0	wordStart = offset;
277	0	} else {
278	0	if (!wordHasComplexChar && IsComplexChar(ch)) {
279	0	wordHasComplexChar = true;
280	0	}
281	0	++offset;
282	0	if (offset >= aLength) {
283	0	// Save this word
284	0	mCurrentWordContainsComplexChar = wordHasComplexChar;
285	0	uint32_t len = offset - wordStart;
286	0	char16_t* elems = mCurrentWord.AppendElements(len);
287	0	if (!elems)
288	0	return NS_ERROR_OUT_OF_MEMORY;
289	0	memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
290	0	mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
291	0	// Ensure that the break-before for this word is written out
292	0	offset = wordStart + 1;
293	0	UpdateCurrentWordLanguage(aHyphenationLanguage);
294	0	break;
295	0	}
296	0	}
297	0	}
298	0
299	0	if (aSink) {
300	0	if (!noBreaksNeeded) {
301	0	aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
302	0	}
303	0	if (!noCapitalizationNeeded) {
304	0	aSink->SetCapitalization(start, offset - start,
305	0	capitalizationState.Elements() + start);
306	0	}
307	0	}
308	0	return NS_OK;
309	0	}
310
311		void
312		nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
313		const char16_t *aTextStart,
314		const char16_t *aTextLimit,
315		uint8_t *aBreakState)
316	0	{
317	0	nsDependentSubstring string(aTextStart, aTextLimit);
318	0	AutoTArray<bool,200> hyphens;
319	0	if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
320	0	for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
321	0	if (hyphens[i]) {
322	0	aBreakState[i + 1] =
323	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
324	0	}
325	0	}
326	0	}
327	0	}
328
329		nsresult
330		nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
331		uint32_t aFlags, nsILineBreakSink* aSink)
332	0	{
333	0	NS_ASSERTION(aLength > 0, "Appending empty text...");
334	0
335	0	if (aFlags & (BREAK_NEED_CAPITALIZATION \| BREAK_USE_AUTO_HYPHENATION)) {
336	0	// Defer to the Unicode path if capitalization or hyphenation is required
337	0	nsAutoString str;
338	0	const char* cp = reinterpret_cast<const char*>(aText);
339	0	CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
340	0	return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
341	0	}
342	0
343	0	uint32_t offset = 0;
344	0
345	0	// Continue the current word
346	0	if (mCurrentWord.Length() > 0) {
347	0	NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
348	0
349	0	while (offset < aLength && !IsSpace(aText[offset])) {
350	0	mCurrentWord.AppendElement(aText[offset]);
351	0	if (!mCurrentWordContainsComplexChar &&
352	0	IsComplexASCIIChar(aText[offset])) {
353	0	mCurrentWordContainsComplexChar = true;
354	0	}
355	0	++offset;
356	0	}
357	0
358	0	if (offset > 0) {
359	0	mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
360	0	}
361	0
362	0	if (offset == aLength) {
363	0	// We did not encounter whitespace so the word hasn't finished yet.
364	0	return NS_OK;
365	0	}
366	0
367	0	// We encountered whitespace, so we're done with this word
368	0	nsresult rv = FlushCurrentWord();
369	0	if (NS_FAILED(rv))
370	0	return rv;
371	0	}
372	0
373	0	AutoTArray<uint8_t,4000> breakState;
374	0	if (aSink) {
375	0	if (!breakState.AppendElements(aLength))
376	0	return NS_ERROR_OUT_OF_MEMORY;
377	0	}
378	0
379	0	uint32_t start = offset;
380	0	bool noBreaksNeeded = !aSink \|\|
381	0	((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
382	0	!mBreakHere && !mAfterBreakableSpace);
383	0	if (noBreaksNeeded) {
384	0	// Skip to the space before the last word, since either the break data
385	0	// here is not needed, or no breaks are set in the sink and there cannot
386	0	// be any breaks in this chunk; all we need is the context for the next
387	0	// chunk (if any)
388	0	offset = aLength;
389	0	while (offset > start) {
390	0	--offset;
391	0	if (IsSpace(aText[offset]))
392	0	break;
393	0	}
394	0	}
395	0	uint32_t wordStart = offset;
396	0	bool wordHasComplexChar = false;
397	0
398	0	for (;;) {
399	0	uint8_t ch = aText[offset];
400	0	bool isSpace = IsSpace(ch);
401	0	bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
402	0
403	0	if (aSink) {
404	0	// Consider word-break style. Since the break position of CJK scripts
405	0	// will be set by nsILineBreaker, we don't consider CJK at this point.
406	0	breakState[offset] =
407	0	mBreakHere \|\| (mAfterBreakableSpace && !isBreakableSpace) \|\|
408	0	(mWordBreak == LineBreaker::kWordBreak_BreakAll) ?
409	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
410	0	gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
411	0	}
412	0	mBreakHere = false;
413	0	mAfterBreakableSpace = isBreakableSpace;
414	0
415	0	if (isSpace) {
416	0	if (offset > wordStart && wordHasComplexChar) {
417	0	if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
418	0	// Save current start-of-word state because GetJISx4051Breaks will
419	0	// set it to false
420	0	uint8_t currentStart = breakState[wordStart];
421	0	nsContentUtils::LineBreaker()->
422	0	GetJISx4051Breaks(aText + wordStart, offset - wordStart,
423	0	mWordBreak,
424	0	breakState.Elements() + wordStart);
425	0	breakState[wordStart] = currentStart;
426	0	}
427	0	wordHasComplexChar = false;
428	0	}
429	0
430	0	++offset;
431	0	if (offset >= aLength)
432	0	break;
433	0	wordStart = offset;
434	0	} else {
435	0	if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
436	0	wordHasComplexChar = true;
437	0	}
438	0	++offset;
439	0	if (offset >= aLength) {
440	0	// Save this word
441	0	mCurrentWordContainsComplexChar = wordHasComplexChar;
442	0	uint32_t len = offset - wordStart;
443	0	char16_t* elems = mCurrentWord.AppendElements(len);
444	0	if (!elems)
445	0	return NS_ERROR_OUT_OF_MEMORY;
446	0	uint32_t i;
447	0	for (i = wordStart; i < offset; ++i) {
448	0	elems[i - wordStart] = aText[i];
449	0	}
450	0	mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
451	0	// Ensure that the break-before for this word is written out
452	0	offset = wordStart + 1;
453	0	break;
454	0	}
455	0	}
456	0	}
457	0
458	0	if (!noBreaksNeeded) {
459	0	aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
460	0	}
461	0	return NS_OK;
462	0	}
463
464		void
465		nsLineBreaker::UpdateCurrentWordLanguage(nsAtom *aHyphenationLanguage)
466	0	{
467	0	if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
468	0	mCurrentWordContainsMixedLang = true;
469	0	} else {
470	0	mCurrentWordLanguage = aHyphenationLanguage;
471	0	}
472	0	}
473
474		nsresult
475		nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
476	0	{
477	0	nsresult rv = FlushCurrentWord();
478	0	if (NS_FAILED(rv))
479	0	return rv;
480	0
481	0	bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
482	0	if (mAfterBreakableSpace && !isBreakableSpace) {
483	0	mBreakHere = true;
484	0	}
485	0	mAfterBreakableSpace = isBreakableSpace;
486	0	return NS_OK;
487	0	}
488
489		nsresult
490		nsLineBreaker::Reset(bool* aTrailingBreak)
491	0	{
492	0	nsresult rv = FlushCurrentWord();
493	0	if (NS_FAILED(rv))
494	0	return rv;
495	0
496	0	*aTrailingBreak = mBreakHere \|\| mAfterBreakableSpace;
497	0	mBreakHere = false;
498	0	mAfterBreakableSpace = false;
499	0	return NS_OK;
500	0	}