/work/obj-fuzz/dist/include/nsBidiUtils.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #ifndef nsBidiUtils_h__ |
7 | | #define nsBidiUtils_h__ |
8 | | |
9 | | #include "nsString.h" |
10 | | |
11 | | extern "C" { |
12 | | |
13 | | bool |
14 | | encoding_mem_is_utf16_bidi(char16_t const* buffer, |
15 | | size_t len); |
16 | | |
17 | | } |
18 | | |
19 | | /** |
20 | | * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt |
21 | | * section BIDIRECTIONAL PROPERTIES |
22 | | * for the detailed definition of the following categories |
23 | | * |
24 | | * The values here must match the equivalents in %bidicategorycode in |
25 | | * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl, |
26 | | * and must also match the values used by ICU's UCharDirection. |
27 | | */ |
28 | | |
29 | | enum nsCharType { |
30 | | eCharType_LeftToRight = 0, |
31 | | eCharType_RightToLeft = 1, |
32 | | eCharType_EuropeanNumber = 2, |
33 | | eCharType_EuropeanNumberSeparator = 3, |
34 | | eCharType_EuropeanNumberTerminator = 4, |
35 | | eCharType_ArabicNumber = 5, |
36 | | eCharType_CommonNumberSeparator = 6, |
37 | | eCharType_BlockSeparator = 7, |
38 | | eCharType_SegmentSeparator = 8, |
39 | | eCharType_WhiteSpaceNeutral = 9, |
40 | | eCharType_OtherNeutral = 10, |
41 | | eCharType_LeftToRightEmbedding = 11, |
42 | | eCharType_LeftToRightOverride = 12, |
43 | | eCharType_RightToLeftArabic = 13, |
44 | | eCharType_RightToLeftEmbedding = 14, |
45 | | eCharType_RightToLeftOverride = 15, |
46 | | eCharType_PopDirectionalFormat = 16, |
47 | | eCharType_DirNonSpacingMark = 17, |
48 | | eCharType_BoundaryNeutral = 18, |
49 | | eCharType_FirstStrongIsolate = 19, |
50 | | eCharType_LeftToRightIsolate = 20, |
51 | | eCharType_RightToLeftIsolate = 21, |
52 | | eCharType_PopDirectionalIsolate = 22, |
53 | | eCharType_CharTypeCount |
54 | | }; |
55 | | |
56 | | /** |
57 | | * This specifies the language directional property of a character set. |
58 | | */ |
59 | | typedef enum nsCharType nsCharType; |
60 | | |
61 | | /** |
62 | | * Find the direction of an embedding level or paragraph level set by |
63 | | * the Unicode Bidi Algorithm. (Even levels are left-to-right, odd |
64 | | * levels right-to-left. |
65 | | */ |
66 | 0 | #define IS_LEVEL_RTL(level) (((level) & 1) == 1) |
67 | | |
68 | | /** |
69 | | * Check whether two bidi levels have the same parity and thus the same |
70 | | * directionality |
71 | | */ |
72 | 0 | #define IS_SAME_DIRECTION(level1, level2) (((level1 ^ level2) & 1) == 0) |
73 | | |
74 | | /** |
75 | | * Convert from nsBidiLevel to nsBidiDirection |
76 | | */ |
77 | 0 | #define DIRECTION_FROM_LEVEL(level) ((IS_LEVEL_RTL(level)) \ |
78 | 0 | ? NSBIDI_RTL : NSBIDI_LTR) |
79 | | |
80 | | /** |
81 | | * definitions of bidirection character types by category |
82 | | */ |
83 | | |
84 | 0 | #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) ) |
85 | | |
86 | 0 | #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \ |
87 | 0 | || ( (val) == eCharType_EuropeanNumberTerminator) \ |
88 | 0 | || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) ) |
89 | | |
90 | | /** |
91 | | * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them |
92 | | * @param aChar is the character |
93 | | * @param aPrevCharArabic is true if the previous character in the string is an Arabic char |
94 | | * @param aNumFlag specifies the conversion to perform: |
95 | | * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
96 | | * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
97 | | * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
98 | | * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
99 | | * @return the converted Unichar |
100 | | */ |
101 | | char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag); |
102 | | |
103 | | /** |
104 | | * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place |
105 | | * @param aBuffer is the string |
106 | | * @param aSize is the size of aBuffer |
107 | | * @param aNumFlag specifies the conversion to perform: |
108 | | * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
109 | | * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
110 | | * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
111 | | * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
112 | | */ |
113 | | nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); |
114 | | |
115 | | /** |
116 | | * Give a UTF-32 codepoint |
117 | | * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; |
118 | | * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). |
119 | | * Return false, otherwise |
120 | | */ |
121 | 0 | #define LRM_CHAR 0x200e |
122 | | #define RLM_CHAR 0x200f |
123 | | |
124 | 0 | #define LRE_CHAR 0x202a |
125 | | #define RLE_CHAR 0x202b |
126 | | #define PDF_CHAR 0x202c |
127 | | #define LRO_CHAR 0x202d |
128 | 0 | #define RLO_CHAR 0x202e |
129 | | |
130 | 0 | #define LRI_CHAR 0x2066 |
131 | | #define RLI_CHAR 0x2067 |
132 | | #define FSI_CHAR 0x2068 |
133 | 0 | #define PDI_CHAR 0x2069 |
134 | | |
135 | 0 | #define ALM_CHAR 0x061C |
136 | 0 | inline bool IsBidiControl(uint32_t aChar) { |
137 | 0 | return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || |
138 | 0 | (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || |
139 | 0 | (aChar == ALM_CHAR) || |
140 | 0 | (aChar & 0xfffffe) == LRM_CHAR); |
141 | 0 | } |
142 | | |
143 | | /** |
144 | | * Give a UTF-32 codepoint |
145 | | * Return true if the codepoint is a Bidi control character that may result |
146 | | * in RTL directionality and therefore needs to trigger bidi resolution; |
147 | | * return false otherwise. |
148 | | */ |
149 | 0 | inline bool IsBidiControlRTL(uint32_t aChar) { |
150 | 0 | return aChar == RLM_CHAR || |
151 | 0 | aChar == RLE_CHAR || |
152 | 0 | aChar == RLO_CHAR || |
153 | 0 | aChar == RLI_CHAR || |
154 | 0 | aChar == ALM_CHAR; |
155 | 0 | } |
156 | | |
157 | | /** |
158 | | * Give a 16-bit (UTF-16) text buffer |
159 | | * @return true if the string contains right-to-left characters |
160 | | */ |
161 | 0 | inline bool HasRTLChars(mozilla::Span<const char16_t> aBuffer) { |
162 | 0 | // Span ensures we never pass a nullptr to Rust--even if the |
163 | 0 | // length of the buffer is zero. |
164 | 0 | return encoding_mem_is_utf16_bidi(aBuffer.Elements(), aBuffer.Length()); |
165 | 0 | } |
166 | | |
167 | | // These values are shared with Preferences dialog |
168 | | // ------------------ |
169 | | // If Pref values are to be changed |
170 | | // in the XUL file of Prefs. the values |
171 | | // Must be changed here too.. |
172 | | // ------------------ |
173 | | // |
174 | 0 | #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" |
175 | 0 | #define IBMBIDI_TEXTTYPE_STR "bidi.texttype" |
176 | 0 | #define IBMBIDI_NUMERAL_STR "bidi.numeral" |
177 | | |
178 | | // ------------------ |
179 | | // Text Direction |
180 | | // ------------------ |
181 | | // bidi.direction |
182 | 0 | #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * |
183 | 0 | #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi |
184 | | // ------------------ |
185 | | // Text Type |
186 | | // ------------------ |
187 | | // bidi.texttype |
188 | 0 | #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * |
189 | 0 | #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi |
190 | 0 | #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi |
191 | | // ------------------ |
192 | | // Numeral Style |
193 | | // ------------------ |
194 | | // bidi.numeral |
195 | 0 | #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * |
196 | 0 | #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi |
197 | 0 | #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi |
198 | 0 | #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi |
199 | 0 | #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi |
200 | 0 | #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi |
201 | 0 | #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi |
202 | | |
203 | | #define IBMBIDI_DEFAULT_BIDI_OPTIONS \ |
204 | | ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \ |
205 | | (IBMBIDI_TEXTTYPE_CHARSET<<4) | \ |
206 | | (IBMBIDI_NUMERAL_NOMINAL<<8)) |
207 | | |
208 | 0 | #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */ |
209 | 0 | #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */ |
210 | 0 | #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */ |
211 | | |
212 | 0 | #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);} |
213 | 0 | #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);} |
214 | 0 | #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);} |
215 | | |
216 | | /* Constants related to the position of numerics in the codepage */ |
217 | 0 | #define START_HINDI_DIGITS 0x0660 |
218 | 0 | #define END_HINDI_DIGITS 0x0669 |
219 | 0 | #define START_ARABIC_DIGITS 0x0030 |
220 | 0 | #define END_ARABIC_DIGITS 0x0039 |
221 | 0 | #define START_FARSI_DIGITS 0x06f0 |
222 | 0 | #define END_FARSI_DIGITS 0x06f9 |
223 | 0 | #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) ) |
224 | 0 | #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) ) |
225 | 0 | #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) ) |
226 | | /** |
227 | | * Arabic numeric separator and numeric formatting characters: |
228 | | * U+0600;ARABIC NUMBER SIGN |
229 | | * U+0601;ARABIC SIGN SANAH |
230 | | * U+0602;ARABIC FOOTNOTE MARKER |
231 | | * U+0603;ARABIC SIGN SAFHA |
232 | | * U+066A;ARABIC PERCENT SIGN |
233 | | * U+066B;ARABIC DECIMAL SEPARATOR |
234 | | * U+066C;ARABIC THOUSANDS SEPARATOR |
235 | | * U+06DD;ARABIC END OF AYAH |
236 | | */ |
237 | 0 | #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \ |
238 | 0 | ( (u) >= 0x066A && (u) <= 0x066C ) || \ |
239 | 0 | ( (u) == 0x06DD ) ) |
240 | | |
241 | | #define IS_BIDI_DIACRITIC(u) ( \ |
242 | | ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \ |
243 | | || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \ |
244 | | || ( (u) == 0x05C2) || ( (u) == 0x05C4) \ |
245 | | || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \ |
246 | | || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \ |
247 | | || ( (u) >= 0x06EA && (u) <= 0x06ED) ) |
248 | | |
249 | 0 | #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) |
250 | 0 | #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \ |
251 | 0 | ( (c) <= 0x06ff || \ |
252 | 0 | ((c) >= 0x0750 && (c) <= 0x077f) || \ |
253 | 0 | (c) >= 0x08a0 ) ) |
254 | 0 | #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \ |
255 | 0 | !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) |
256 | | |
257 | | /** |
258 | | * The codepoint ranges in the following macros are based on the blocks |
259 | | * allocated, or planned to be allocated, to right-to-left characters in the |
260 | | * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) |
261 | | * according to |
262 | | * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and |
263 | | * http://www.unicode.org/roadmaps/ |
264 | | */ |
265 | | |
266 | 0 | #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) |
267 | 0 | #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \ |
268 | 0 | ((0xfe70 <= (c)) && ((c) <= 0xfefc))) |
269 | | #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ |
270 | | ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) |
271 | | // Due to the supplementary-plane RTL blocks being identifiable from the |
272 | | // high surrogate without examining the low surrogate, it is correct to |
273 | | // use this by-code-unit check on potentially astral text without doing |
274 | | // the math to decode surrogate pairs into code points. However, unpaired |
275 | | // high surrogates that are RTL high surrogates then count as RTL even |
276 | | // though, if replaced by the REPLACEMENT CHARACTER, it would not be |
277 | | // RTL. |
278 | 0 | #define UTF16_CODE_UNIT_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
279 | 0 | (IS_RTL_PRESENTATION_FORM(c)) || \ |
280 | 0 | (c) == 0xD802 || (c) == 0xD803 || \ |
281 | 0 | (c) == 0xD83A || (c) == 0xD83B) |
282 | | #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
283 | | (IS_RTL_PRESENTATION_FORM(c)) || \ |
284 | | (IS_IN_SMP_RTL_BLOCK(c))) |
285 | | #endif /* nsBidiUtils_h__ */ |