/work/obj-fuzz/dist/include/nsUnicodeProperties.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* vim:set ts=4 sw=4 sts=4 et cindent: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #ifndef NS_UNICODEPROPERTIES_H |
8 | | #define NS_UNICODEPROPERTIES_H |
9 | | |
10 | | #include "nsBidiUtils.h" |
11 | | #include "nsUGenCategory.h" |
12 | | #include "nsUnicodeScriptCodes.h" |
13 | | #include "harfbuzz/hb.h" |
14 | | |
15 | | #include "unicode/uchar.h" |
16 | | #include "unicode/uscript.h" |
17 | | |
18 | | const nsCharProps2& GetCharProps2(uint32_t aCh); |
19 | | |
20 | | namespace mozilla { |
21 | | |
22 | | namespace unicode { |
23 | | |
24 | | extern const nsUGenCategory sDetailedToGeneralCategory[]; |
25 | | |
26 | | /* This MUST match the values assigned by genUnicodePropertyData.pl! */ |
27 | | enum VerticalOrientation { |
28 | | VERTICAL_ORIENTATION_U = 0, |
29 | | VERTICAL_ORIENTATION_R = 1, |
30 | | VERTICAL_ORIENTATION_Tu = 2, |
31 | | VERTICAL_ORIENTATION_Tr = 3 |
32 | | }; |
33 | | |
34 | | /* This MUST match the values assigned by genUnicodePropertyData.pl! */ |
35 | | enum PairedBracketType { |
36 | | PAIRED_BRACKET_TYPE_NONE = 0, |
37 | | PAIRED_BRACKET_TYPE_OPEN = 1, |
38 | | PAIRED_BRACKET_TYPE_CLOSE = 2 |
39 | | }; |
40 | | |
41 | | /* Flags for Unicode security IdentifierType.txt attributes. Only a subset |
42 | | of these are currently checked by Gecko, so we only define flags for the |
43 | | ones we need. */ |
44 | | enum IdentifierType { |
45 | | IDTYPE_RESTRICTED = 0, |
46 | | IDTYPE_ALLOWED = 1, |
47 | | }; |
48 | | |
49 | | enum EmojiPresentation { |
50 | | TextOnly = 0, |
51 | | TextDefault = 1, |
52 | | EmojiDefault = 2 |
53 | | }; |
54 | | |
55 | | const uint32_t kVariationSelector15 = 0xFE0E; // text presentation |
56 | | const uint32_t kVariationSelector16 = 0xFE0F; // emoji presentation |
57 | | |
58 | | extern const hb_unicode_general_category_t sICUtoHBcategory[]; |
59 | | |
60 | | inline uint32_t |
61 | | GetMirroredChar(uint32_t aCh) |
62 | 0 | { |
63 | 0 | return u_charMirror(aCh); |
64 | 0 | } |
65 | | |
66 | | inline bool |
67 | | HasMirroredChar(uint32_t aCh) |
68 | 0 | { |
69 | 0 | return u_isMirrored(aCh); |
70 | 0 | } |
71 | | |
72 | | inline uint8_t |
73 | | GetCombiningClass(uint32_t aCh) |
74 | 0 | { |
75 | 0 | return u_getCombiningClass(aCh); |
76 | 0 | } |
77 | | |
78 | | inline uint8_t |
79 | | GetGeneralCategory(uint32_t aCh) |
80 | 85.0k | { |
81 | 85.0k | return sICUtoHBcategory[u_charType(aCh)]; |
82 | 85.0k | } |
83 | | |
84 | | inline nsCharType |
85 | | GetBidiCat(uint32_t aCh) |
86 | 0 | { |
87 | 0 | return nsCharType(u_charDirection(aCh)); |
88 | 0 | } |
89 | | |
90 | | inline int8_t |
91 | | GetNumericValue(uint32_t aCh) |
92 | 17.0k | { |
93 | 17.0k | UNumericType type = |
94 | 17.0k | UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE)); |
95 | 17.0k | return type == U_NT_DECIMAL || type == U_NT_DIGIT |
96 | 17.0k | ? int8_t(u_getNumericValue(aCh)) : -1; |
97 | 17.0k | } |
98 | | |
99 | | inline uint8_t |
100 | | GetLineBreakClass(uint32_t aCh) |
101 | 0 | { |
102 | 0 | return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK); |
103 | 0 | } |
104 | | |
105 | | inline Script |
106 | | GetScriptCode(uint32_t aCh) |
107 | | { |
108 | | UErrorCode err = U_ZERO_ERROR; |
109 | | return Script(uscript_getScript(aCh, &err)); |
110 | | } |
111 | | |
112 | | inline bool |
113 | | HasScript(uint32_t aCh, Script aScript) |
114 | 0 | { |
115 | 0 | return uscript_hasScript(aCh, UScriptCode(aScript)); |
116 | 0 | } |
117 | | |
118 | | inline uint32_t |
119 | | GetScriptTagForCode(Script aScriptCode) |
120 | 0 | { |
121 | 0 | const char* tag = uscript_getShortName(UScriptCode(aScriptCode)); |
122 | 0 | if (tag) { |
123 | 0 | return HB_TAG(tag[0], tag[1], tag[2], tag[3]); |
124 | 0 | } |
125 | 0 | // return UNKNOWN script tag (running with older ICU?) |
126 | 0 | return HB_SCRIPT_UNKNOWN; |
127 | 0 | } |
128 | | |
129 | | inline PairedBracketType |
130 | | GetPairedBracketType(uint32_t aCh) |
131 | 0 | { |
132 | 0 | return PairedBracketType |
133 | 0 | (u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE)); |
134 | 0 | } |
135 | | |
136 | | inline uint32_t |
137 | | GetPairedBracket(uint32_t aCh) |
138 | 0 | { |
139 | 0 | return u_getBidiPairedBracket(aCh); |
140 | 0 | } |
141 | | |
142 | | inline uint32_t |
143 | | GetUppercase(uint32_t aCh) |
144 | | { |
145 | | return u_toupper(aCh); |
146 | | } |
147 | | |
148 | | inline uint32_t |
149 | | GetLowercase(uint32_t aCh) |
150 | | { |
151 | | return u_tolower(aCh); |
152 | | } |
153 | | |
154 | | inline uint32_t |
155 | | GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged |
156 | | { |
157 | | return u_isULowercase(aCh) ? u_totitle(aCh) : aCh; |
158 | | } |
159 | | |
160 | | inline uint32_t |
161 | | GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase |
162 | 0 | { |
163 | 0 | return u_totitle(aCh); |
164 | 0 | } |
165 | | |
166 | | inline bool |
167 | | IsEastAsianWidthFWH(uint32_t aCh) |
168 | | { |
169 | | switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) { |
170 | | case U_EA_FULLWIDTH: |
171 | | case U_EA_WIDE: |
172 | | case U_EA_HALFWIDTH: |
173 | | return true; |
174 | | case U_EA_AMBIGUOUS: |
175 | | case U_EA_NARROW: |
176 | | case U_EA_NEUTRAL: |
177 | | return false; |
178 | | } |
179 | | return false; |
180 | | } |
181 | | |
182 | | inline bool |
183 | | IsDefaultIgnorable(uint32_t aCh) |
184 | 0 | { |
185 | 0 | return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT); |
186 | 0 | } |
187 | | |
188 | | inline EmojiPresentation |
189 | | GetEmojiPresentation(uint32_t aCh) |
190 | 0 | { |
191 | 0 | if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) { |
192 | 0 | return TextOnly; |
193 | 0 | } |
194 | 0 | |
195 | 0 | if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) { |
196 | 0 | return EmojiDefault; |
197 | 0 | } |
198 | 0 | return TextDefault; |
199 | 0 | } |
200 | | |
201 | | // returns the simplified Gen Category as defined in nsUGenCategory |
202 | 0 | inline nsUGenCategory GetGenCategory(uint32_t aCh) { |
203 | 0 | return sDetailedToGeneralCategory[GetGeneralCategory(aCh)]; |
204 | 0 | } |
205 | | |
206 | 0 | inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) { |
207 | 0 | return VerticalOrientation(GetCharProps2(aCh).mVertOrient); |
208 | 0 | } |
209 | | |
210 | 88.5k | inline IdentifierType GetIdentifierType(uint32_t aCh) { |
211 | 88.5k | return IdentifierType(GetCharProps2(aCh).mIdType); |
212 | 88.5k | } |
213 | | |
214 | | uint32_t GetFullWidth(uint32_t aCh); |
215 | | // This is the reverse function of GetFullWidth which guarantees that |
216 | | // for every codepoint c, GetFullWidthInverse(GetFullWidth(c)) == c. |
217 | | // Note that, this function does not guarantee to convert all wide |
218 | | // form characters to their possible narrow form. |
219 | | uint32_t GetFullWidthInverse(uint32_t aCh); |
220 | | |
221 | | bool IsClusterExtender(uint32_t aCh, uint8_t aCategory); |
222 | | |
223 | | inline bool IsClusterExtender(uint32_t aCh) { |
224 | | return IsClusterExtender(aCh, GetGeneralCategory(aCh)); |
225 | | } |
226 | | |
227 | | // A simple iterator for a string of char16_t codepoints that advances |
228 | | // by Unicode grapheme clusters |
229 | | class ClusterIterator |
230 | | { |
231 | | public: |
232 | | ClusterIterator(const char16_t* aText, uint32_t aLength) |
233 | | : mPos(aText), mLimit(aText + aLength) |
234 | | #ifdef DEBUG |
235 | | , mText(aText) |
236 | | #endif |
237 | | { } |
238 | | |
239 | 0 | operator const char16_t* () const { |
240 | 0 | return mPos; |
241 | 0 | } |
242 | | |
243 | | bool AtEnd() const { |
244 | | return mPos >= mLimit; |
245 | | } |
246 | | |
247 | | void Next(); |
248 | | |
249 | | private: |
250 | | const char16_t* mPos; |
251 | | const char16_t* mLimit; |
252 | | #ifdef DEBUG |
253 | | const char16_t* mText; |
254 | | #endif |
255 | | }; |
256 | | |
257 | | // Count the number of grapheme clusters in the given string |
258 | | uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength); |
259 | | |
260 | | // A simple reverse iterator for a string of char16_t codepoints that |
261 | | // advances by Unicode grapheme clusters |
262 | | class ClusterReverseIterator |
263 | | { |
264 | | public: |
265 | | ClusterReverseIterator(const char16_t* aText, uint32_t aLength) |
266 | | : mPos(aText + aLength), mLimit(aText) |
267 | 0 | { } |
268 | | |
269 | 0 | operator const char16_t* () const { |
270 | 0 | return mPos; |
271 | 0 | } |
272 | | |
273 | | bool AtEnd() const { |
274 | | return mPos <= mLimit; |
275 | | } |
276 | | |
277 | | void Next(); |
278 | | |
279 | | private: |
280 | | const char16_t* mPos; |
281 | | const char16_t* mLimit; |
282 | | }; |
283 | | |
284 | | } // end namespace unicode |
285 | | |
286 | | } // end namespace mozilla |
287 | | |
288 | | #endif /* NS_UNICODEPROPERTIES_H */ |