/src/skia/modules/skunicode/include/SkUnicode.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2020 Google LLC |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license that can be |
5 | | * found in the LICENSE file. |
6 | | */ |
7 | | #ifndef SkUnicode_DEFINED |
8 | | #define SkUnicode_DEFINED |
9 | | #include "include/core/SkRefCnt.h" |
10 | | #include "include/core/SkSpan.h" |
11 | | #include "include/core/SkString.h" |
12 | | #include "include/core/SkTypes.h" |
13 | | #include "include/private/base/SkTArray.h" |
14 | | #include "include/private/base/SkTo.h" |
15 | | #include "src/base/SkUTF.h" |
16 | | #include <cstddef> |
17 | | #include <cstdint> |
18 | | #include <memory> |
19 | | #include <string> |
20 | | #include <vector> |
21 | | namespace sknonstd { template <typename T> struct is_bitmask_enum; } |
22 | | |
23 | | #if !defined(SKUNICODE_IMPLEMENTATION) |
24 | | #define SKUNICODE_IMPLEMENTATION 0 |
25 | | #endif |
26 | | |
27 | | #if !defined(SKUNICODE_API) |
28 | | #if defined(SKUNICODE_DLL) |
29 | | #if defined(_MSC_VER) |
30 | | #if SKUNICODE_IMPLEMENTATION |
31 | | #define SKUNICODE_API __declspec(dllexport) |
32 | | #else |
33 | | #define SKUNICODE_API __declspec(dllimport) |
34 | | #endif |
35 | | #else |
36 | | #define SKUNICODE_API __attribute__((visibility("default"))) |
37 | | #endif |
38 | | #else |
39 | | #define SKUNICODE_API |
40 | | #endif |
41 | | #endif |
42 | | |
43 | | class SKUNICODE_API SkBidiIterator { |
44 | | public: |
45 | | typedef int32_t Position; |
46 | | typedef uint8_t Level; |
47 | | struct Region { |
48 | | Region(Position start, Position end, Level level) |
49 | 0 | : start(start), end(end), level(level) { } |
50 | | Position start; |
51 | | Position end; |
52 | | Level level; |
53 | | }; |
54 | | enum Direction { |
55 | | kLTR, |
56 | | kRTL, |
57 | | }; |
58 | 0 | virtual ~SkBidiIterator() = default; |
59 | | virtual Position getLength() = 0; |
60 | | virtual Level getLevelAt(Position) = 0; |
61 | | }; |
62 | | |
63 | | class SKUNICODE_API SkBreakIterator { |
64 | | public: |
65 | | typedef int32_t Position; |
66 | | typedef int32_t Status; |
67 | 0 | virtual ~SkBreakIterator() = default; |
68 | | virtual Position first() = 0; |
69 | | virtual Position current() = 0; |
70 | | virtual Position next() = 0; |
71 | | virtual Status status() = 0; |
72 | | virtual bool isDone() = 0; |
73 | | virtual bool setText(const char utftext8[], int utf8Units) = 0; |
74 | | virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; |
75 | | }; |
76 | | |
77 | | class SKUNICODE_API SkUnicode : public SkRefCnt { |
78 | | public: |
79 | | enum CodeUnitFlags { |
80 | | kNoCodeUnitFlag = 0x00, |
81 | | kPartOfWhiteSpaceBreak = 0x01, |
82 | | kGraphemeStart = 0x02, |
83 | | kSoftLineBreakBefore = 0x04, |
84 | | kHardLineBreakBefore = 0x08, |
85 | | kPartOfIntraWordBreak = 0x10, |
86 | | kControl = 0x20, |
87 | | kTabulation = 0x40, |
88 | | kGlyphClusterStart = 0x80, |
89 | | kIdeographic = 0x100, |
90 | | kEmoji = 0x200, |
91 | | kWordBreak = 0x400, |
92 | | kSentenceBreak = 0x800, |
93 | | }; |
94 | | enum class TextDirection { |
95 | | kLTR, |
96 | | kRTL, |
97 | | }; |
98 | | typedef size_t Position; |
99 | | typedef uint8_t BidiLevel; |
100 | | struct BidiRegion { |
101 | | BidiRegion(Position start, Position end, BidiLevel level) |
102 | 0 | : start(start), end(end), level(level) { } |
103 | | Position start; |
104 | | Position end; |
105 | | BidiLevel level; |
106 | | }; |
107 | | enum class LineBreakType { |
108 | | kSoftLineBreak = 0, |
109 | | kHardLineBreak = 100, |
110 | | }; |
111 | | |
112 | | enum class BreakType { kWords, kGraphemes, kLines, kSentences }; |
113 | | struct LineBreakBefore { |
114 | | LineBreakBefore(Position pos, LineBreakType breakType) |
115 | 0 | : pos(pos), breakType(breakType) { } |
116 | | Position pos; |
117 | | LineBreakType breakType; |
118 | | }; |
119 | | |
120 | | ~SkUnicode() override = default; |
121 | | |
122 | | // deprecated |
123 | | virtual SkString toUpper(const SkString&) = 0; |
124 | | virtual SkString toUpper(const SkString&, const char* locale) = 0; |
125 | | |
126 | | virtual bool isControl(SkUnichar utf8) = 0; |
127 | | virtual bool isWhitespace(SkUnichar utf8) = 0; |
128 | | virtual bool isSpace(SkUnichar utf8) = 0; |
129 | | virtual bool isTabulation(SkUnichar utf8) = 0; |
130 | | virtual bool isHardBreak(SkUnichar utf8) = 0; |
131 | | /** |
132 | | * Returns if a code point may start an emoji sequence. |
133 | | * Returns true for '#', '*', and '0'-'9' since they may start an emoji sequence. |
134 | | * To determine if a list of code points begins with an emoji sequence, use |
135 | | * getEmojiSequence. |
136 | | **/ |
137 | | virtual bool isEmoji(SkUnichar utf8) = 0; |
138 | | virtual bool isEmojiComponent(SkUnichar utf8) = 0; |
139 | | virtual bool isEmojiModifierBase(SkUnichar utf8) = 0; |
140 | | virtual bool isEmojiModifier(SkUnichar utf8) = 0; |
141 | | virtual bool isRegionalIndicator(SkUnichar utf8) = 0; |
142 | | virtual bool isIdeographic(SkUnichar utf8) = 0; |
143 | | |
144 | | // Methods used in SkShaper and SkText |
145 | | virtual std::unique_ptr<SkBidiIterator> makeBidiIterator |
146 | | (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; |
147 | | virtual std::unique_ptr<SkBidiIterator> makeBidiIterator |
148 | | (const char text[], int count, SkBidiIterator::Direction) = 0; |
149 | | virtual std::unique_ptr<SkBreakIterator> makeBreakIterator |
150 | | (const char locale[], BreakType breakType) = 0; |
151 | | virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0; |
152 | | |
153 | | // Methods used in SkParagraph |
154 | | static bool hasTabulationFlag(SkUnicode::CodeUnitFlags flags); |
155 | | static bool hasHardLineBreakFlag(SkUnicode::CodeUnitFlags flags); |
156 | | static bool hasSoftLineBreakFlag(SkUnicode::CodeUnitFlags flags); |
157 | | static bool hasGraphemeStartFlag(SkUnicode::CodeUnitFlags flags); |
158 | | static bool hasControlFlag(SkUnicode::CodeUnitFlags flags); |
159 | | static bool hasPartOfWhiteSpaceBreakFlag(SkUnicode::CodeUnitFlags flags); |
160 | | |
161 | | static bool extractBidi(const char utf8[], |
162 | | int utf8Units, |
163 | | TextDirection dir, |
164 | | std::vector<BidiRegion>* bidiRegions); |
165 | | virtual bool getBidiRegions(const char utf8[], |
166 | | int utf8Units, |
167 | | TextDirection dir, |
168 | | std::vector<BidiRegion>* results) = 0; |
169 | | // Returns results in utf16 |
170 | | virtual bool getWords(const char utf8[], int utf8Units, const char* locale, |
171 | | std::vector<Position>* results) = 0; |
172 | | virtual bool getUtf8Words(const char utf8[], |
173 | | int utf8Units, |
174 | | const char* locale, |
175 | | std::vector<Position>* results) = 0; |
176 | | virtual bool getSentences(const char utf8[], |
177 | | int utf8Units, |
178 | | const char* locale, |
179 | | std::vector<Position>* results) = 0; |
180 | | virtual bool computeCodeUnitFlags( |
181 | | char utf8[], int utf8Units, bool replaceTabs, |
182 | | skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; |
183 | | virtual bool computeCodeUnitFlags( |
184 | | char16_t utf16[], int utf16Units, bool replaceTabs, |
185 | | skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) = 0; |
186 | | |
187 | | static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); |
188 | | static SkString convertUtf16ToUtf8(const std::u16string& utf16); |
189 | | static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); |
190 | | static std::u16string convertUtf8ToUtf16(const SkString& utf8); |
191 | | |
192 | | template <typename Appender8, typename Appender16> |
193 | 0 | static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) { |
194 | 0 | size_t size8 = 0; |
195 | 0 | size_t size16 = 0; |
196 | 0 | auto ptr = utf8.begin(); |
197 | 0 | auto end = utf8.end(); |
198 | 0 | while (ptr < end) { |
199 | |
|
200 | 0 | size_t index = SkToSizeT(ptr - utf8.begin()); |
201 | 0 | SkUnichar u = SkUTF::NextUTF8(&ptr, end); |
202 | | |
203 | | // All UTF8 code units refer to the same codepoint |
204 | 0 | size_t next = SkToSizeT(ptr - utf8.begin()); |
205 | 0 | for (auto i = index; i < next; ++i) { |
206 | | //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); |
207 | 0 | appender16(size8); |
208 | 0 | ++size16; |
209 | 0 | } |
210 | | //SkASSERT(fUTF16IndexForUTF8Index.size() == next); |
211 | 0 | SkASSERT(size16 == next); |
212 | 0 | if (size16 != next) { |
213 | 0 | return false; |
214 | 0 | } |
215 | | |
216 | | // One or two UTF16 code units refer to the same codepoint |
217 | 0 | uint16_t buffer[2]; |
218 | 0 | size_t count = SkUTF::ToUTF16(u, buffer); |
219 | | //fUTF8IndexForUTF16Index.emplace_back(index); |
220 | 0 | appender8(index); |
221 | 0 | ++size8; |
222 | 0 | if (count > 1) { |
223 | | //fUTF8IndexForUTF16Index.emplace_back(index); |
224 | 0 | appender8(index); |
225 | 0 | ++size8; |
226 | 0 | } |
227 | 0 | } |
228 | | //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); |
229 | 0 | appender16(size8); |
230 | 0 | ++size16; |
231 | | //fUTF8IndexForUTF16Index.emplace_back(fText.size()); |
232 | 0 | appender8(utf8.size()); |
233 | 0 | ++size8; |
234 | |
|
235 | 0 | return true; |
236 | 0 | } Unexecuted instantiation: bool SkUnicode::extractUtfConversionMapping<SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#1}, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#2}>(SkSpan<char const>, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#1}&&, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#2}&&) Unexecuted instantiation: bool SkUnicode::extractUtfConversionMapping<SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#1}, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#2}>(SkSpan<char const>, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#1}&&, SkUnicode_icu::getUtf8Words(char const*, int, char const*, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >*)::{lambda(unsigned long)#2}&&) Unexecuted instantiation: ParagraphBuilderImpl.cpp:bool SkUnicode::extractUtfConversionMapping<skia::textlayout::ParagraphBuilderImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#1}, skia::textlayout::ParagraphBuilderImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#2}>(SkSpan<char const>, skia::textlayout::ParagraphBuilderImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#1}&&, skia::textlayout::ParagraphBuilderImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#2}&&) Unexecuted instantiation: ParagraphImpl.cpp:bool SkUnicode::extractUtfConversionMapping<skia::textlayout::ParagraphImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#1}, skia::textlayout::ParagraphImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#2}>(SkSpan<char const>, skia::textlayout::ParagraphImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#1}&&, skia::textlayout::ParagraphImpl::ensureUTF16Mapping()::$_0::operator()() const::{lambda(unsigned long)#2}&&) |
237 | | |
238 | | template <typename Callback> |
239 | 0 | void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { |
240 | 0 | const char* current = utf8; |
241 | 0 | const char* end = utf8 + utf8Units; |
242 | 0 | while (current < end) { |
243 | 0 | auto before = current - utf8; |
244 | 0 | SkUnichar unichar = SkUTF::NextUTF8(¤t, end); |
245 | 0 | if (unichar < 0) unichar = 0xFFFD; |
246 | 0 | auto after = current - utf8; |
247 | 0 | uint16_t buffer[2]; |
248 | 0 | size_t count = SkUTF::ToUTF16(unichar, buffer); |
249 | 0 | callback(unichar, before, after, count); |
250 | 0 | } |
251 | 0 | } Unexecuted instantiation: ParagraphImpl.cpp:void SkUnicode::forEachCodepoint<skia::textlayout::ParagraphImpl::addUnresolvedCodepoints(skia::textlayout::SkRange<unsigned long>)::$_0>(char const*, int, skia::textlayout::ParagraphImpl::addUnresolvedCodepoints(skia::textlayout::SkRange<unsigned long>)::$_0&&) Unexecuted instantiation: ParagraphImpl.cpp:void SkUnicode::forEachCodepoint<skia::textlayout::ParagraphImpl::containsEmoji(SkTextBlob*)::$_0>(char const*, int, skia::textlayout::ParagraphImpl::containsEmoji(SkTextBlob*)::$_0&&) |
252 | | |
253 | | template <typename Callback> |
254 | 0 | void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { |
255 | 0 | const char16_t* current = utf16; |
256 | 0 | const char16_t* end = utf16 + utf16Units; |
257 | 0 | while (current < end) { |
258 | 0 | auto before = current - utf16; |
259 | 0 | SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); |
260 | 0 | auto after = current - utf16; |
261 | 0 | callback(unichar, before, after); |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | | template <typename Callback> |
266 | | void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { |
267 | | auto iter = makeBidiIterator(utf16, utf16Units, dir); |
268 | | const uint16_t* start16 = utf16; |
269 | | const uint16_t* end16 = utf16 + utf16Units; |
270 | | SkBidiIterator::Level currentLevel = 0; |
271 | | |
272 | | SkBidiIterator::Position pos16 = 0; |
273 | | while (pos16 <= iter->getLength()) { |
274 | | auto level = iter->getLevelAt(pos16); |
275 | | if (pos16 == 0) { |
276 | | currentLevel = level; |
277 | | } else if (level != currentLevel) { |
278 | | callback(pos16, start16 - utf16, currentLevel); |
279 | | currentLevel = level; |
280 | | } |
281 | | if (start16 == end16) { |
282 | | break; |
283 | | } |
284 | | SkUnichar u = SkUTF::NextUTF16(&start16, end16); |
285 | | pos16 += SkUTF::ToUTF16(u); |
286 | | } |
287 | | } |
288 | | |
289 | | template <typename Callback> |
290 | 0 | void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { |
291 | 0 | auto iter = makeBreakIterator(type); |
292 | 0 | iter->setText(utf16, utf16Units); |
293 | 0 | auto pos = iter->first(); |
294 | 0 | do { |
295 | 0 | callback(pos, iter->status()); |
296 | 0 | pos = iter->next(); |
297 | 0 | } while (!iter->isDone()); |
298 | 0 | } Unexecuted instantiation: void SkUnicode::forEachBreak<SkUnicode_icu::computeCodeUnitFlags(char16_t*, int, bool, skia_private::TArray<SkUnicode::CodeUnitFlags, true>*)::{lambda(int, int)#1}>(char16_t const*, int, SkUnicode::BreakType, SkUnicode_icu::computeCodeUnitFlags(char16_t*, int, bool, skia_private::TArray<SkUnicode::CodeUnitFlags, true>*)::{lambda(int, int)#1}&&) Unexecuted instantiation: void SkUnicode::forEachBreak<SkUnicode_icu::computeCodeUnitFlags(char16_t*, int, bool, skia_private::TArray<SkUnicode::CodeUnitFlags, true>*)::{lambda(int, int)#2}>(char16_t const*, int, SkUnicode::BreakType, SkUnicode_icu::computeCodeUnitFlags(char16_t*, int, bool, skia_private::TArray<SkUnicode::CodeUnitFlags, true>*)::{lambda(int, int)#2}&&) |
299 | | |
300 | | virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; |
301 | | }; |
302 | | |
303 | | namespace sknonstd { |
304 | | template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {}; |
305 | | } // namespace sknonstd |
306 | | |
307 | | #endif // SkUnicode_DEFINED |