/src/mozilla-central/layout/generic/nsTextRunTransformations.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "nsTextRunTransformations.h" |
8 | | |
9 | | #include "mozilla/ComputedStyleInlines.h" |
10 | | #include "mozilla/MemoryReporting.h" |
11 | | #include "mozilla/Move.h" |
12 | | |
13 | | #include "nsGkAtoms.h" |
14 | | #include "nsStyleConsts.h" |
15 | | #include "nsUnicharUtils.h" |
16 | | #include "nsUnicodeProperties.h" |
17 | | #include "nsSpecialCasingData.h" |
18 | | #include "mozilla/gfx/2D.h" |
19 | | #include "nsTextFrameUtils.h" |
20 | | #include "nsIPersistentProperties2.h" |
21 | | #include "GreekCasing.h" |
22 | | #include "IrishCasing.h" |
23 | | |
24 | | using namespace mozilla; |
25 | | using namespace mozilla::gfx; |
26 | | |
27 | | // Unicode characters needing special casing treatment in tr/az languages |
28 | 0 | #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 |
29 | 0 | #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 |
30 | | |
31 | | // Greek sigma needs custom handling for the lowercase transform; for details |
32 | | // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within |
33 | | // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120. |
34 | 0 | #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 |
35 | 0 | #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 |
36 | 0 | #define GREEK_SMALL_LETTER_SIGMA 0x03C3 |
37 | | |
38 | | already_AddRefed<nsTransformedTextRun> |
39 | | nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams, |
40 | | nsTransformingTextRunFactory* aFactory, |
41 | | gfxFontGroup* aFontGroup, |
42 | | const char16_t* aString, uint32_t aLength, |
43 | | const gfx::ShapedTextFlags aFlags, |
44 | | const nsTextFrameUtils::Flags aFlags2, |
45 | | nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, |
46 | | bool aOwnsFactory) |
47 | 0 | { |
48 | 0 | NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT), |
49 | 0 | "didn't expect text to be marked as 8-bit here"); |
50 | 0 |
|
51 | 0 | void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); |
52 | 0 | if (!storage) { |
53 | 0 | return nullptr; |
54 | 0 | } |
55 | 0 | |
56 | 0 | RefPtr<nsTransformedTextRun> result = |
57 | 0 | new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, |
58 | 0 | aString, aLength, aFlags, aFlags2, |
59 | 0 | std::move(aStyles), aOwnsFactory); |
60 | 0 | return result.forget(); |
61 | 0 | } |
62 | | |
63 | | void |
64 | | nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, |
65 | | bool* aCapitalization) |
66 | 0 | { |
67 | 0 | if (mCapitalize.IsEmpty()) { |
68 | 0 | if (!mCapitalize.AppendElements(GetLength())) |
69 | 0 | return; |
70 | 0 | memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool)); |
71 | 0 | } |
72 | 0 | memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool)); |
73 | 0 | mNeedsRebuild = true; |
74 | 0 | } |
75 | | |
76 | | bool |
77 | | nsTransformedTextRun::SetPotentialLineBreaks(Range aRange, |
78 | | const uint8_t* aBreakBefore) |
79 | 0 | { |
80 | 0 | bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore); |
81 | 0 | if (changed) { |
82 | 0 | mNeedsRebuild = true; |
83 | 0 | } |
84 | 0 | return changed; |
85 | 0 | } |
86 | | |
87 | | size_t |
88 | | nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) |
89 | 0 | { |
90 | 0 | size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); |
91 | 0 | total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf); |
92 | 0 | total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf); |
93 | 0 | if (mOwnsFactory) { |
94 | 0 | total += aMallocSizeOf(mFactory); |
95 | 0 | } |
96 | 0 | return total; |
97 | 0 | } |
98 | | |
99 | | size_t |
100 | | nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) |
101 | 0 | { |
102 | 0 | return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); |
103 | 0 | } |
104 | | |
105 | | already_AddRefed<nsTransformedTextRun> |
106 | | nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength, |
107 | | const gfxTextRunFactory::Parameters* aParams, |
108 | | gfxFontGroup* aFontGroup, |
109 | | gfx::ShapedTextFlags aFlags, |
110 | | nsTextFrameUtils::Flags aFlags2, |
111 | | nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, |
112 | | bool aOwnsFactory) |
113 | 0 | { |
114 | 0 | return nsTransformedTextRun::Create(aParams, this, aFontGroup, |
115 | 0 | aString, aLength, aFlags, aFlags2, std::move(aStyles), |
116 | 0 | aOwnsFactory); |
117 | 0 | } |
118 | | |
119 | | already_AddRefed<nsTransformedTextRun> |
120 | | nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength, |
121 | | const gfxTextRunFactory::Parameters* aParams, |
122 | | gfxFontGroup* aFontGroup, |
123 | | gfx::ShapedTextFlags aFlags, |
124 | | nsTextFrameUtils::Flags aFlags2, |
125 | | nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, |
126 | | bool aOwnsFactory) |
127 | 0 | { |
128 | 0 | // We'll only have a Unicode code path to minimize the amount of code needed |
129 | 0 | // for these rarely used features |
130 | 0 | NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength); |
131 | 0 | return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, |
132 | 0 | aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, |
133 | 0 | aFlags2, |
134 | 0 | std::move(aStyles), aOwnsFactory); |
135 | 0 | } |
136 | | |
137 | | void |
138 | | MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, |
139 | | const bool* aCharsToMerge, const bool* aDeletedChars) |
140 | 0 | { |
141 | 0 | aDest->ResetGlyphRuns(); |
142 | 0 |
|
143 | 0 | gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc)); |
144 | 0 | uint32_t offset = 0; |
145 | 0 | AutoTArray<gfxTextRun::DetailedGlyph,2> glyphs; |
146 | 0 | const gfxTextRun::CompressedGlyph continuationGlyph = |
147 | 0 | gfxTextRun::CompressedGlyph::MakeComplex(false, false, 0); |
148 | 0 | while (iter.NextRun()) { |
149 | 0 | const gfxTextRun::GlyphRun* run = iter.GetGlyphRun(); |
150 | 0 | nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, |
151 | 0 | offset, false, run->mOrientation); |
152 | 0 | if (NS_FAILED(rv)) |
153 | 0 | return; |
154 | 0 | |
155 | 0 | bool anyMissing = false; |
156 | 0 | uint32_t mergeRunStart = iter.GetStringStart(); |
157 | 0 | const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs(); |
158 | 0 | gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; |
159 | 0 | uint32_t stringEnd = iter.GetStringEnd(); |
160 | 0 | for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) { |
161 | 0 | const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; |
162 | 0 | if (g.IsSimpleGlyph()) { |
163 | 0 | if (!anyMissing) { |
164 | 0 | gfxTextRun::DetailedGlyph details; |
165 | 0 | details.mGlyphID = g.GetSimpleGlyph(); |
166 | 0 | details.mAdvance = g.GetSimpleAdvance(); |
167 | 0 | glyphs.AppendElement(details); |
168 | 0 | } |
169 | 0 | } else { |
170 | 0 | if (g.IsMissing()) { |
171 | 0 | anyMissing = true; |
172 | 0 | glyphs.Clear(); |
173 | 0 | } |
174 | 0 | if (g.GetGlyphCount() > 0) { |
175 | 0 | glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); |
176 | 0 | } |
177 | 0 | } |
178 | 0 |
|
179 | 0 | if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) { |
180 | 0 | // next char is supposed to merge with current, so loop without |
181 | 0 | // writing current merged glyph to the destination |
182 | 0 | continue; |
183 | 0 | } |
184 | 0 | |
185 | 0 | // If the start of the merge run is actually a character that should |
186 | 0 | // have been merged with the previous character (this can happen |
187 | 0 | // if there's a font change in the middle of a case-mapped character, |
188 | 0 | // that decomposed into a sequence of base+diacritics, for example), |
189 | 0 | // just discard the entire merge run. See comment at start of this |
190 | 0 | // function. |
191 | 0 | NS_WARNING_ASSERTION( |
192 | 0 | !aCharsToMerge[mergeRunStart], |
193 | 0 | "unable to merge across a glyph run boundary, glyph(s) discarded"); |
194 | 0 | if (!aCharsToMerge[mergeRunStart]) { |
195 | 0 | if (anyMissing) { |
196 | 0 | mergedGlyph.SetMissing(glyphs.Length()); |
197 | 0 | } else { |
198 | 0 | mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), |
199 | 0 | mergedGlyph.IsLigatureGroupStart(), |
200 | 0 | glyphs.Length()); |
201 | 0 | } |
202 | 0 | aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements()); |
203 | 0 | ++offset; |
204 | 0 |
|
205 | 0 | while (offset < aDest->GetLength() && aDeletedChars[offset]) { |
206 | 0 | aDest->SetGlyphs(offset++, continuationGlyph, nullptr); |
207 | 0 | } |
208 | 0 | } |
209 | 0 |
|
210 | 0 | glyphs.Clear(); |
211 | 0 | anyMissing = false; |
212 | 0 | mergeRunStart = k + 1; |
213 | 0 | if (mergeRunStart < stringEnd) { |
214 | 0 | mergedGlyph = srcGlyphs[mergeRunStart]; |
215 | 0 | } |
216 | 0 | } |
217 | 0 | NS_ASSERTION(glyphs.Length() == 0, |
218 | 0 | "Leftover glyphs, don't request merging of the last character with its next!"); |
219 | 0 | } |
220 | 0 | NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); |
221 | 0 | } |
222 | | |
223 | | gfxTextRunFactory::Parameters |
224 | | GetParametersForInner(nsTransformedTextRun* aTextRun, |
225 | | gfx::ShapedTextFlags* aFlags, |
226 | | DrawTarget* aRefDrawTarget) |
227 | 0 | { |
228 | 0 | gfxTextRunFactory::Parameters params = |
229 | 0 | { aRefDrawTarget, nullptr, nullptr, |
230 | 0 | nullptr, 0, aTextRun->GetAppUnitsPerDevUnit() |
231 | 0 | }; |
232 | 0 | *aFlags = aTextRun->GetFlags(); |
233 | 0 | return params; |
234 | 0 | } |
235 | | |
236 | | // Some languages have special casing conventions that differ from the |
237 | | // default Unicode mappings. |
238 | | // The enum values here are named for well-known exemplar languages that |
239 | | // exhibit the behavior in question; multiple lang tags may map to the |
240 | | // same setting here, if the behavior is shared by other languages. |
241 | | enum LanguageSpecificCasingBehavior { |
242 | | eLSCB_None, // default non-lang-specific behavior |
243 | | eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization |
244 | | eLSCB_Greek, // strip accent when uppercasing Greek vowels |
245 | | eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish |
246 | | eLSCB_Turkish // preserve dotted/dotless-i distinction in uppercase |
247 | | }; |
248 | | |
249 | | static LanguageSpecificCasingBehavior |
250 | | GetCasingFor(const nsAtom* aLang) |
251 | 0 | { |
252 | 0 | if (!aLang) { |
253 | 0 | return eLSCB_None; |
254 | 0 | } |
255 | 0 | if (aLang == nsGkAtoms::tr || |
256 | 0 | aLang == nsGkAtoms::az || |
257 | 0 | aLang == nsGkAtoms::ba || |
258 | 0 | aLang == nsGkAtoms::crh || |
259 | 0 | aLang == nsGkAtoms::tt) { |
260 | 0 | return eLSCB_Turkish; |
261 | 0 | } |
262 | 0 | if (aLang == nsGkAtoms::nl) { |
263 | 0 | return eLSCB_Dutch; |
264 | 0 | } |
265 | 0 | if (aLang == nsGkAtoms::el) { |
266 | 0 | return eLSCB_Greek; |
267 | 0 | } |
268 | 0 | if (aLang == nsGkAtoms::ga) { |
269 | 0 | return eLSCB_Irish; |
270 | 0 | } |
271 | 0 | |
272 | 0 | // Is there a region subtag we should ignore? |
273 | 0 | nsAtomString langStr(const_cast<nsAtom*>(aLang)); |
274 | 0 | int index = langStr.FindChar('-'); |
275 | 0 | if (index > 0) { |
276 | 0 | langStr.Truncate(index); |
277 | 0 | RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr); |
278 | 0 | return GetCasingFor(truncatedLang); |
279 | 0 | } |
280 | 0 | |
281 | 0 | return eLSCB_None; |
282 | 0 | } |
283 | | |
284 | | bool |
285 | | nsCaseTransformTextRunFactory::TransformString( |
286 | | const nsAString& aString, |
287 | | nsString& aConvertedString, |
288 | | bool aAllUppercase, |
289 | | const nsAtom* aLanguage, |
290 | | nsTArray<bool>& aCharsToMergeArray, |
291 | | nsTArray<bool>& aDeletedCharsArray, |
292 | | const nsTransformedTextRun* aTextRun, |
293 | | uint32_t aOffsetInTextRun, |
294 | | nsTArray<uint8_t>* aCanBreakBeforeArray, |
295 | | nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) |
296 | 0 | { |
297 | 0 | bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray; |
298 | 0 | MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun, |
299 | 0 | "text run must be provided to use aux output arrays"); |
300 | 0 |
|
301 | 0 | uint32_t length = aString.Length(); |
302 | 0 | const char16_t* str = aString.BeginReading(); |
303 | 0 |
|
304 | 0 | bool mergeNeeded = false; |
305 | 0 |
|
306 | 0 | bool capitalizeDutchIJ = false; |
307 | 0 | bool prevIsLetter = false; |
308 | 0 | bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' |
309 | 0 | // when doing Irish lowercasing |
310 | 0 | uint32_t sigmaIndex = uint32_t(-1); |
311 | 0 | nsUGenCategory cat; |
312 | 0 |
|
313 | 0 | uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0; |
314 | 0 | bool forceNonFullWidth = false; |
315 | 0 | const nsAtom* lang = aLanguage; |
316 | 0 |
|
317 | 0 | LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); |
318 | 0 | mozilla::GreekCasing::State greekState; |
319 | 0 | mozilla::IrishCasing::State irishState; |
320 | 0 | uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) |
321 | 0 | // in the output string |
322 | 0 | uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source |
323 | 0 | // string (may differ from output due to |
324 | 0 | // expansions like eszet -> 'SS') |
325 | 0 | uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need |
326 | 0 | // tonos added (if it is disjunctive eta) |
327 | 0 | const char16_t kGreekUpperEta = 0x0397; |
328 | 0 |
|
329 | 0 | for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) { |
330 | 0 | uint32_t ch = str[i]; |
331 | 0 |
|
332 | 0 | RefPtr<nsTransformedCharStyle> charStyle; |
333 | 0 | if (aTextRun) { |
334 | 0 | charStyle = aTextRun->mStyles[aOffsetInTextRun]; |
335 | 0 | style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : |
336 | 0 | charStyle->mTextTransform; |
337 | 0 | forceNonFullWidth = charStyle->mForceNonFullWidth; |
338 | 0 |
|
339 | 0 | nsAtom* newLang = charStyle->mExplicitLanguage |
340 | 0 | ? charStyle->mLanguage.get() : nullptr; |
341 | 0 | if (lang != newLang) { |
342 | 0 | lang = newLang; |
343 | 0 | languageSpecificCasing = GetCasingFor(lang); |
344 | 0 | greekState.Reset(); |
345 | 0 | irishState.Reset(); |
346 | 0 | irishMark = uint32_t(-1); |
347 | 0 | irishMarkSrc = uint32_t(-1); |
348 | 0 | greekMark = uint32_t(-1); |
349 | 0 | } |
350 | 0 | } |
351 | 0 |
|
352 | 0 | int extraChars = 0; |
353 | 0 | const mozilla::unicode::MultiCharMapping *mcm; |
354 | 0 | bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? |
355 | 0 |
|
356 | 0 | if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && |
357 | 0 | NS_IS_LOW_SURROGATE(str[i + 1])) { |
358 | 0 | ch = SURROGATE_TO_UCS4(ch, str[i + 1]); |
359 | 0 | } |
360 | 0 |
|
361 | 0 | switch (style) { |
362 | 0 | case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: |
363 | 0 | if (languageSpecificCasing == eLSCB_Turkish) { |
364 | 0 | if (ch == 'I') { |
365 | 0 | ch = LATIN_SMALL_LETTER_DOTLESS_I; |
366 | 0 | prevIsLetter = true; |
367 | 0 | sigmaIndex = uint32_t(-1); |
368 | 0 | break; |
369 | 0 | } |
370 | 0 | if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { |
371 | 0 | ch = 'i'; |
372 | 0 | prevIsLetter = true; |
373 | 0 | sigmaIndex = uint32_t(-1); |
374 | 0 | break; |
375 | 0 | } |
376 | 0 | } |
377 | 0 | |
378 | 0 | cat = mozilla::unicode::GetGenCategory(ch); |
379 | 0 |
|
380 | 0 | if (languageSpecificCasing == eLSCB_Irish && |
381 | 0 | cat == nsUGenCategory::kLetter) { |
382 | 0 | // See bug 1018805 for Irish lowercasing requirements |
383 | 0 | if (!prevIsLetter && (ch == 'n' || ch == 't')) { |
384 | 0 | ntPrefix = true; |
385 | 0 | } else { |
386 | 0 | if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { |
387 | 0 | aConvertedString.Append('-'); |
388 | 0 | ++extraChars; |
389 | 0 | } |
390 | 0 | ntPrefix = false; |
391 | 0 | } |
392 | 0 | } else { |
393 | 0 | ntPrefix = false; |
394 | 0 | } |
395 | 0 |
|
396 | 0 | // Special lowercasing behavior for Greek Sigma: note that this is listed |
397 | 0 | // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a |
398 | 0 | // language-specific mapping; it applies regardless of the language of |
399 | 0 | // the element. |
400 | 0 | // |
401 | 0 | // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. |
402 | 0 | // the non-final form) whenever there is a following letter, or when the |
403 | 0 | // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a |
404 | 0 | // LETTER); and to FINAL SIGMA when it is preceded by another letter but |
405 | 0 | // not followed by one. |
406 | 0 | // |
407 | 0 | // To implement the context-sensitive nature of this mapping, we keep |
408 | 0 | // track of whether the previous character was a letter. If not, CAPITAL |
409 | 0 | // SIGMA will map directly to SMALL SIGMA. If the previous character |
410 | 0 | // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the |
411 | 0 | // position in the converted string; if we then encounter another letter, |
412 | 0 | // that FINAL SIGMA is replaced with a standard SMALL SIGMA. |
413 | 0 |
|
414 | 0 | // If sigmaIndex is not -1, it marks where we have provisionally mapped |
415 | 0 | // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we |
416 | 0 | // need to change it to SMALL SIGMA. |
417 | 0 | if (sigmaIndex != uint32_t(-1)) { |
418 | 0 | if (cat == nsUGenCategory::kLetter) { |
419 | 0 | aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); |
420 | 0 | } |
421 | 0 | } |
422 | 0 |
|
423 | 0 | if (ch == GREEK_CAPITAL_LETTER_SIGMA) { |
424 | 0 | // If preceding char was a letter, map to FINAL instead of SMALL, |
425 | 0 | // and note where it occurred by setting sigmaIndex; we'll change it |
426 | 0 | // to standard SMALL SIGMA later if another letter follows |
427 | 0 | if (prevIsLetter) { |
428 | 0 | ch = GREEK_SMALL_LETTER_FINAL_SIGMA; |
429 | 0 | sigmaIndex = aConvertedString.Length(); |
430 | 0 | } else { |
431 | 0 | // CAPITAL SIGMA not preceded by a letter is unconditionally mapped |
432 | 0 | // to SMALL SIGMA |
433 | 0 | ch = GREEK_SMALL_LETTER_SIGMA; |
434 | 0 | sigmaIndex = uint32_t(-1); |
435 | 0 | } |
436 | 0 | prevIsLetter = true; |
437 | 0 | break; |
438 | 0 | } |
439 | 0 |
|
440 | 0 | // ignore diacritics for the purpose of contextual sigma mapping; |
441 | 0 | // otherwise, reset prevIsLetter appropriately and clear the |
442 | 0 | // sigmaIndex marker |
443 | 0 | if (cat != nsUGenCategory::kMark) { |
444 | 0 | prevIsLetter = (cat == nsUGenCategory::kLetter); |
445 | 0 | sigmaIndex = uint32_t(-1); |
446 | 0 | } |
447 | 0 |
|
448 | 0 | mcm = mozilla::unicode::SpecialLower(ch); |
449 | 0 | if (mcm) { |
450 | 0 | int j = 0; |
451 | 0 | while (j < 2 && mcm->mMappedChars[j + 1]) { |
452 | 0 | aConvertedString.Append(mcm->mMappedChars[j]); |
453 | 0 | ++extraChars; |
454 | 0 | ++j; |
455 | 0 | } |
456 | 0 | ch = mcm->mMappedChars[j]; |
457 | 0 | break; |
458 | 0 | } |
459 | 0 |
|
460 | 0 | ch = ToLowerCase(ch); |
461 | 0 | break; |
462 | 0 |
|
463 | 0 | case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: |
464 | 0 | if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { |
465 | 0 | ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; |
466 | 0 | break; |
467 | 0 | } |
468 | 0 |
|
469 | 0 | if (languageSpecificCasing == eLSCB_Greek) { |
470 | 0 | bool markEta; |
471 | 0 | bool updateEta; |
472 | 0 | ch = mozilla::GreekCasing::UpperCase(ch, greekState, |
473 | 0 | markEta, updateEta); |
474 | 0 | if (markEta) { |
475 | 0 | greekMark = aConvertedString.Length(); |
476 | 0 | } else if (updateEta) { |
477 | 0 | // Remove the TONOS from an uppercase ETA-TONOS that turned out |
478 | 0 | // not to be disjunctive-eta. |
479 | 0 | MOZ_ASSERT(aConvertedString.Length() > 0 && |
480 | 0 | greekMark < aConvertedString.Length(), |
481 | 0 | "bad greekMark!"); |
482 | 0 | aConvertedString.SetCharAt(kGreekUpperEta, greekMark); |
483 | 0 | greekMark = uint32_t(-1); |
484 | 0 | } |
485 | 0 | break; |
486 | 0 | } |
487 | 0 |
|
488 | 0 | if (languageSpecificCasing == eLSCB_Irish) { |
489 | 0 | bool mark; |
490 | 0 | uint8_t action; |
491 | 0 | ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); |
492 | 0 | if (mark) { |
493 | 0 | irishMark = aConvertedString.Length(); |
494 | 0 | irishMarkSrc = i; |
495 | 0 | break; |
496 | 0 | } else if (action) { |
497 | 0 | nsString& str = aConvertedString; // shorthand |
498 | 0 | switch (action) { |
499 | 0 | case 1: |
500 | 0 | // lowercase a single prefix letter |
501 | 0 | NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(), |
502 | 0 | "bad irishMark!"); |
503 | 0 | str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); |
504 | 0 | irishMark = uint32_t(-1); |
505 | 0 | irishMarkSrc = uint32_t(-1); |
506 | 0 | break; |
507 | 0 | case 2: |
508 | 0 | // lowercase two prefix letters (immediately before current pos) |
509 | 0 | NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, |
510 | 0 | "bad irishMark!"); |
511 | 0 | str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); |
512 | 0 | str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); |
513 | 0 | irishMark = uint32_t(-1); |
514 | 0 | irishMarkSrc = uint32_t(-1); |
515 | 0 | break; |
516 | 0 | case 3: |
517 | 0 | // lowercase one prefix letter, and delete following hyphen |
518 | 0 | // (which must be the immediately-preceding char) |
519 | 0 | NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, |
520 | 0 | "bad irishMark!"); |
521 | 0 | MOZ_ASSERT(irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1), |
522 | 0 | "failed to set irishMarks"); |
523 | 0 | str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); |
524 | 0 | aDeletedCharsArray[irishMarkSrc + 1] = true; |
525 | 0 | // Remove the trailing entries (corresponding to the deleted hyphen) |
526 | 0 | // from the auxiliary arrays. |
527 | 0 | aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1); |
528 | 0 | if (auxiliaryOutputArrays) { |
529 | 0 | aStyleArray->SetLength(aStyleArray->Length() - 1); |
530 | 0 | aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1); |
531 | 0 | inhibitBreakBefore = true; |
532 | 0 | } |
533 | 0 | mergeNeeded = true; |
534 | 0 | irishMark = uint32_t(-1); |
535 | 0 | irishMarkSrc = uint32_t(-1); |
536 | 0 | break; |
537 | 0 | } |
538 | 0 | // ch has been set to the uppercase for current char; |
539 | 0 | // No need to check for SpecialUpper here as none of the characters |
540 | 0 | // that could trigger an Irish casing action have special mappings. |
541 | 0 | break; |
542 | 0 | } |
543 | 0 | // If we didn't have any special action to perform, fall through |
544 | 0 | // to check for special uppercase (ß) |
545 | 0 | } |
546 | 0 |
|
547 | 0 | mcm = mozilla::unicode::SpecialUpper(ch); |
548 | 0 | if (mcm) { |
549 | 0 | int j = 0; |
550 | 0 | while (j < 2 && mcm->mMappedChars[j + 1]) { |
551 | 0 | aConvertedString.Append(mcm->mMappedChars[j]); |
552 | 0 | ++extraChars; |
553 | 0 | ++j; |
554 | 0 | } |
555 | 0 | ch = mcm->mMappedChars[j]; |
556 | 0 | break; |
557 | 0 | } |
558 | 0 |
|
559 | 0 | // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of lack |
560 | 0 | // of widespread font support for the corresponding Mtavruli characters |
561 | 0 | // at this time (July 2018). |
562 | 0 | // This condition is to be removed once the major platforms ship with |
563 | 0 | // fonts that support U+1C90..1CBF. |
564 | 0 | if (ch < 0x10D0 || ch > 0x10FF) { |
565 | 0 | ch = ToUpperCase(ch); |
566 | 0 | } |
567 | 0 | break; |
568 | 0 |
|
569 | 0 | case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: |
570 | 0 | if (aTextRun) { |
571 | 0 | if (capitalizeDutchIJ && ch == 'j') { |
572 | 0 | ch = 'J'; |
573 | 0 | capitalizeDutchIJ = false; |
574 | 0 | break; |
575 | 0 | } |
576 | 0 | capitalizeDutchIJ = false; |
577 | 0 | if (aOffsetInTextRun < aTextRun->mCapitalize.Length() && |
578 | 0 | aTextRun->mCapitalize[aOffsetInTextRun]) { |
579 | 0 | if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { |
580 | 0 | ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; |
581 | 0 | break; |
582 | 0 | } |
583 | 0 | if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { |
584 | 0 | ch = 'I'; |
585 | 0 | capitalizeDutchIJ = true; |
586 | 0 | break; |
587 | 0 | } |
588 | 0 | |
589 | 0 | mcm = mozilla::unicode::SpecialTitle(ch); |
590 | 0 | if (mcm) { |
591 | 0 | int j = 0; |
592 | 0 | while (j < 2 && mcm->mMappedChars[j + 1]) { |
593 | 0 | aConvertedString.Append(mcm->mMappedChars[j]); |
594 | 0 | ++extraChars; |
595 | 0 | ++j; |
596 | 0 | } |
597 | 0 | ch = mcm->mMappedChars[j]; |
598 | 0 | break; |
599 | 0 | } |
600 | 0 |
|
601 | 0 | ch = ToTitleCase(ch); |
602 | 0 | } |
603 | 0 | } |
604 | 0 | break; |
605 | 0 |
|
606 | 0 | case NS_STYLE_TEXT_TRANSFORM_FULL_WIDTH: |
607 | 0 | ch = mozilla::unicode::GetFullWidth(ch); |
608 | 0 | break; |
609 | 0 |
|
610 | 0 | default: |
611 | 0 | break; |
612 | 0 | } |
613 | 0 | |
614 | 0 | if (forceNonFullWidth) { |
615 | 0 | ch = mozilla::unicode::GetFullWidthInverse(ch); |
616 | 0 | } |
617 | 0 |
|
618 | 0 | if (ch == uint32_t(-1)) { |
619 | 0 | aDeletedCharsArray.AppendElement(true); |
620 | 0 | mergeNeeded = true; |
621 | 0 | } else { |
622 | 0 | aDeletedCharsArray.AppendElement(false); |
623 | 0 | aCharsToMergeArray.AppendElement(false); |
624 | 0 | if (auxiliaryOutputArrays) { |
625 | 0 | aStyleArray->AppendElement(charStyle); |
626 | 0 | aCanBreakBeforeArray->AppendElement( |
627 | 0 | inhibitBreakBefore ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE |
628 | 0 | : aTextRun->CanBreakBefore(aOffsetInTextRun)); |
629 | 0 | } |
630 | 0 |
|
631 | 0 | if (IS_IN_BMP(ch)) { |
632 | 0 | aConvertedString.Append(ch); |
633 | 0 | } else { |
634 | 0 | aConvertedString.Append(H_SURROGATE(ch)); |
635 | 0 | aConvertedString.Append(L_SURROGATE(ch)); |
636 | 0 | i++; |
637 | 0 | aOffsetInTextRun++; |
638 | 0 | aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the |
639 | 0 | // trailing surrogate is skipped |
640 | 0 | ++extraChars; |
641 | 0 | } |
642 | 0 |
|
643 | 0 | while (extraChars-- > 0) { |
644 | 0 | mergeNeeded = true; |
645 | 0 | aCharsToMergeArray.AppendElement(true); |
646 | 0 | if (auxiliaryOutputArrays) { |
647 | 0 | aStyleArray->AppendElement(charStyle); |
648 | 0 | aCanBreakBeforeArray->AppendElement( |
649 | 0 | gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE); |
650 | 0 | } |
651 | 0 | } |
652 | 0 | } |
653 | 0 | } |
654 | 0 |
|
655 | 0 | return mergeNeeded; |
656 | 0 | } |
657 | | |
658 | | void |
659 | | nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, |
660 | | DrawTarget* aRefDrawTarget, |
661 | | gfxMissingFontRecorder* aMFR) |
662 | 0 | { |
663 | 0 | nsAutoString convertedString; |
664 | 0 | AutoTArray<bool,50> charsToMergeArray; |
665 | 0 | AutoTArray<bool,50> deletedCharsArray; |
666 | 0 | AutoTArray<uint8_t,50> canBreakBeforeArray; |
667 | 0 | AutoTArray<RefPtr<nsTransformedCharStyle>,50> styleArray; |
668 | 0 |
|
669 | 0 | bool mergeNeeded = TransformString(aTextRun->mString, |
670 | 0 | convertedString, |
671 | 0 | mAllUppercase, |
672 | 0 | nullptr, |
673 | 0 | charsToMergeArray, |
674 | 0 | deletedCharsArray, |
675 | 0 | aTextRun, 0, |
676 | 0 | &canBreakBeforeArray, |
677 | 0 | &styleArray); |
678 | 0 |
|
679 | 0 | gfx::ShapedTextFlags flags; |
680 | 0 | gfxTextRunFactory::Parameters innerParams = |
681 | 0 | GetParametersForInner(aTextRun, &flags, aRefDrawTarget); |
682 | 0 | gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); |
683 | 0 |
|
684 | 0 | RefPtr<nsTransformedTextRun> transformedChild; |
685 | 0 | RefPtr<gfxTextRun> cachedChild; |
686 | 0 | gfxTextRun* child; |
687 | 0 |
|
688 | 0 | if (mInnerTransformingTextRunFactory) { |
689 | 0 | transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( |
690 | 0 | convertedString.BeginReading(), convertedString.Length(), |
691 | 0 | &innerParams, fontGroup, flags, nsTextFrameUtils::Flags(), |
692 | 0 | std::move(styleArray), false); |
693 | 0 | child = transformedChild.get(); |
694 | 0 | } else { |
695 | 0 | cachedChild = fontGroup->MakeTextRun( |
696 | 0 | convertedString.BeginReading(), convertedString.Length(), |
697 | 0 | &innerParams, flags, nsTextFrameUtils::Flags(), aMFR); |
698 | 0 | child = cachedChild.get(); |
699 | 0 | } |
700 | 0 | if (!child) |
701 | 0 | return; |
702 | 0 | // Copy potential linebreaks into child so they're preserved |
703 | 0 | // (and also child will be shaped appropriately) |
704 | 0 | NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), |
705 | 0 | "Dropped characters or break-before values somewhere!"); |
706 | 0 | gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length())); |
707 | 0 | child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements()); |
708 | 0 | if (transformedChild) { |
709 | 0 | transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR); |
710 | 0 | } |
711 | 0 |
|
712 | 0 | if (mergeNeeded) { |
713 | 0 | // Now merge multiple characters into one multi-glyph character as required |
714 | 0 | // and deal with skipping deleted accent chars |
715 | 0 | NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), |
716 | 0 | "source length mismatch"); |
717 | 0 | NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), |
718 | 0 | "destination length mismatch"); |
719 | 0 | MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), |
720 | 0 | deletedCharsArray.Elements()); |
721 | 0 | } else { |
722 | 0 | // No merging to do, so just copy; this produces a more optimized textrun. |
723 | 0 | // We can't steal the data because the child may be cached and stealing |
724 | 0 | // the data would break the cache. |
725 | 0 | aTextRun->ResetGlyphRuns(); |
726 | 0 | aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0); |
727 | 0 | } |
728 | 0 | } |