Coverage Report

Created: 2018-09-25 14:53

/work/obj-fuzz/dist/include/nsBidiUtils.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#ifndef nsBidiUtils_h__
7
#define nsBidiUtils_h__
8
9
#include "nsString.h"
10
11
extern "C" {
12
13
bool
14
encoding_mem_is_utf16_bidi(char16_t const* buffer,
15
                           size_t len);
16
17
}
18
19
   /**
20
    *  Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
21
    *  section BIDIRECTIONAL PROPERTIES
22
    *  for the detailed definition of the following categories
23
    *
24
    *  The values here must match the equivalents in %bidicategorycode in
25
    *  mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
26
    *  and must also match the values used by ICU's UCharDirection.
27
    */
28
29
enum nsCharType   {
30
  eCharType_LeftToRight              = 0,
31
  eCharType_RightToLeft              = 1,
32
  eCharType_EuropeanNumber           = 2,
33
  eCharType_EuropeanNumberSeparator  = 3,
34
  eCharType_EuropeanNumberTerminator = 4,
35
  eCharType_ArabicNumber             = 5,
36
  eCharType_CommonNumberSeparator    = 6,
37
  eCharType_BlockSeparator           = 7,
38
  eCharType_SegmentSeparator         = 8,
39
  eCharType_WhiteSpaceNeutral        = 9,
40
  eCharType_OtherNeutral             = 10,
41
  eCharType_LeftToRightEmbedding     = 11,
42
  eCharType_LeftToRightOverride      = 12,
43
  eCharType_RightToLeftArabic        = 13,
44
  eCharType_RightToLeftEmbedding     = 14,
45
  eCharType_RightToLeftOverride      = 15,
46
  eCharType_PopDirectionalFormat     = 16,
47
  eCharType_DirNonSpacingMark        = 17,
48
  eCharType_BoundaryNeutral          = 18,
49
  eCharType_FirstStrongIsolate       = 19,
50
  eCharType_LeftToRightIsolate       = 20,
51
  eCharType_RightToLeftIsolate       = 21,
52
  eCharType_PopDirectionalIsolate    = 22,
53
  eCharType_CharTypeCount
54
};
55
56
/**
57
 * This specifies the language directional property of a character set.
58
 */
59
typedef enum nsCharType nsCharType;
60
61
/**
62
 * Find the direction of an embedding level or paragraph level set by
63
 * the Unicode Bidi Algorithm. (Even levels are left-to-right, odd
64
 * levels right-to-left.
65
 */
66
0
#define IS_LEVEL_RTL(level) (((level) & 1) == 1)
67
68
/**
69
 * Check whether two bidi levels have the same parity and thus the same
70
 * directionality
71
 */
72
0
#define IS_SAME_DIRECTION(level1, level2) (((level1 ^ level2) & 1) == 0)
73
74
/**
75
 * Convert from nsBidiLevel to nsBidiDirection
76
 */
77
0
#define DIRECTION_FROM_LEVEL(level) ((IS_LEVEL_RTL(level)) \
78
0
   ? NSBIDI_RTL : NSBIDI_LTR)
79
80
/**
81
 * definitions of bidirection character types by category
82
 */
83
84
0
#define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
85
86
0
#define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator)    \
87
0
                           || ( (val) == eCharType_EuropeanNumberTerminator) \
88
0
                           || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
89
90
  /**
91
   * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
92
   * @param aChar is the character
93
   * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
94
   * @param aNumFlag specifies the conversion to perform:
95
   *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
96
   *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
97
   *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
98
   *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
99
   * @return the converted Unichar
100
   */
101
  char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
102
103
  /**
104
   * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
105
   * @param aBuffer is the string
106
   * @param aSize is the size of aBuffer
107
   * @param aNumFlag specifies the conversion to perform:
108
   *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
109
   *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
110
   *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
111
   *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
112
   */
113
  nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t  aNumFlag);
114
115
  /**
116
   * Give a UTF-32 codepoint
117
   * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
118
   * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
119
   * Return false, otherwise
120
   */
121
0
#define LRM_CHAR 0x200e
122
#define RLM_CHAR 0x200f
123
124
0
#define LRE_CHAR 0x202a
125
#define RLE_CHAR 0x202b
126
#define PDF_CHAR 0x202c
127
#define LRO_CHAR 0x202d
128
0
#define RLO_CHAR 0x202e
129
130
0
#define LRI_CHAR 0x2066
131
#define RLI_CHAR 0x2067
132
#define FSI_CHAR 0x2068
133
0
#define PDI_CHAR 0x2069
134
135
0
#define ALM_CHAR 0x061C
136
0
  inline bool IsBidiControl(uint32_t aChar) {
137
0
    return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
138
0
            (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
139
0
            (aChar == ALM_CHAR) ||
140
0
            (aChar & 0xfffffe) == LRM_CHAR);
141
0
  }
142
143
  /**
144
   * Give a UTF-32 codepoint
145
   * Return true if the codepoint is a Bidi control character that may result
146
   * in RTL directionality and therefore needs to trigger bidi resolution;
147
   * return false otherwise.
148
   */
149
0
  inline bool IsBidiControlRTL(uint32_t aChar) {
150
0
    return aChar == RLM_CHAR ||
151
0
           aChar == RLE_CHAR ||
152
0
           aChar == RLO_CHAR ||
153
0
           aChar == RLI_CHAR ||
154
0
           aChar == ALM_CHAR;
155
0
  }
156
157
  /**
158
   * Give a 16-bit (UTF-16) text buffer
159
   * @return true if the string contains right-to-left characters
160
   */
161
0
  inline bool HasRTLChars(mozilla::Span<const char16_t> aBuffer) {
162
0
    // Span ensures we never pass a nullptr to Rust--even if the
163
0
    // length of the buffer is zero.
164
0
    return encoding_mem_is_utf16_bidi(aBuffer.Elements(), aBuffer.Length());
165
0
  }
166
167
// These values are shared with Preferences dialog
168
//  ------------------
169
//  If Pref values are to be changed
170
//  in the XUL file of Prefs. the values
171
//  Must be changed here too..
172
//  ------------------
173
//
174
0
#define IBMBIDI_TEXTDIRECTION_STR       "bidi.direction"
175
0
#define IBMBIDI_TEXTTYPE_STR            "bidi.texttype"
176
0
#define IBMBIDI_NUMERAL_STR             "bidi.numeral"
177
178
//  ------------------
179
//  Text Direction
180
//  ------------------
181
//  bidi.direction
182
0
#define IBMBIDI_TEXTDIRECTION_LTR     1 //  1 = directionLTRBidi *
183
0
#define IBMBIDI_TEXTDIRECTION_RTL     2 //  2 = directionRTLBidi
184
//  ------------------
185
//  Text Type
186
//  ------------------
187
//  bidi.texttype
188
0
#define IBMBIDI_TEXTTYPE_CHARSET      1 //  1 = charsettexttypeBidi *
189
0
#define IBMBIDI_TEXTTYPE_LOGICAL      2 //  2 = logicaltexttypeBidi
190
0
#define IBMBIDI_TEXTTYPE_VISUAL       3 //  3 = visualtexttypeBidi
191
//  ------------------
192
//  Numeral Style
193
//  ------------------
194
//  bidi.numeral
195
0
#define IBMBIDI_NUMERAL_NOMINAL       0 //  0 = nominalnumeralBidi *
196
0
#define IBMBIDI_NUMERAL_REGULAR       1 //  1 = regularcontextnumeralBidi
197
0
#define IBMBIDI_NUMERAL_HINDICONTEXT  2 //  2 = hindicontextnumeralBidi
198
0
#define IBMBIDI_NUMERAL_ARABIC        3 //  3 = arabicnumeralBidi
199
0
#define IBMBIDI_NUMERAL_HINDI         4 //  4 = hindinumeralBidi
200
0
#define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
201
0
#define IBMBIDI_NUMERAL_PERSIAN       6 //  6 = persiannumeralBidi
202
203
#define IBMBIDI_DEFAULT_BIDI_OPTIONS              \
204
        ((IBMBIDI_TEXTDIRECTION_LTR<<0)         | \
205
         (IBMBIDI_TEXTTYPE_CHARSET<<4)          | \
206
         (IBMBIDI_NUMERAL_NOMINAL<<8))
207
208
0
#define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
209
0
#define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
210
0
#define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
211
212
0
#define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
213
0
#define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
214
0
#define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
215
216
/* Constants related to the position of numerics in the codepage */
217
0
#define START_HINDI_DIGITS              0x0660
218
0
#define END_HINDI_DIGITS                0x0669
219
0
#define START_ARABIC_DIGITS             0x0030
220
0
#define END_ARABIC_DIGITS               0x0039
221
0
#define START_FARSI_DIGITS              0x06f0
222
0
#define END_FARSI_DIGITS                0x06f9
223
0
#define IS_HINDI_DIGIT(u)   ( ( (u) >= START_HINDI_DIGITS )  && ( (u) <= END_HINDI_DIGITS ) )
224
0
#define IS_ARABIC_DIGIT(u)  ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
225
0
#define IS_FARSI_DIGIT(u)  ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
226
/**
227
 * Arabic numeric separator and numeric formatting characters:
228
 *  U+0600;ARABIC NUMBER SIGN
229
 *  U+0601;ARABIC SIGN SANAH
230
 *  U+0602;ARABIC FOOTNOTE MARKER
231
 *  U+0603;ARABIC SIGN SAFHA
232
 *  U+066A;ARABIC PERCENT SIGN
233
 *  U+066B;ARABIC DECIMAL SEPARATOR
234
 *  U+066C;ARABIC THOUSANDS SEPARATOR
235
 *  U+06DD;ARABIC END OF AYAH
236
 */
237
0
#define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
238
0
                                 ( (u) >= 0x066A && (u) <= 0x066C ) || \
239
0
                                 ( (u) == 0x06DD ) )
240
241
#define IS_BIDI_DIACRITIC(u) ( \
242
  ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
243
    || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
244
    || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
245
    || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
246
    || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
247
    || ( (u) >= 0x06EA && (u) <= 0x06ED) )
248
249
0
#define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
250
0
#define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) &&   \
251
0
                            ( (c) <= 0x06ff ||                    \
252
0
                              ((c) >= 0x0750 && (c) <= 0x077f) || \
253
0
                              (c) >= 0x08a0 ) )
254
0
#define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
255
0
                                !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
256
257
/**
258
 * The codepoint ranges in the following macros are based on the blocks
259
 *  allocated, or planned to be allocated, to right-to-left characters in the
260
 *  BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
261
 *  according to
262
 *  http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
263
 *  http://www.unicode.org/roadmaps/
264
 */
265
266
0
#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
267
0
#define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
268
0
                                     ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
269
#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
270
                                ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
271
// Due to the supplementary-plane RTL blocks being identifiable from the
272
// high surrogate without examining the low surrogate, it is correct to
273
// use this by-code-unit check on potentially astral text without doing
274
// the math to decode surrogate pairs into code points. However, unpaired
275
// high surrogates that are RTL high surrogates then count as RTL even
276
// though, if replaced by the REPLACEMENT CHARACTER, it would not be
277
// RTL.
278
0
#define UTF16_CODE_UNIT_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
279
0
                                    (IS_RTL_PRESENTATION_FORM(c)) || \
280
0
                                    (c) == 0xD802 || (c) == 0xD803 || \
281
0
                                    (c) == 0xD83A || (c) == 0xD83B)
282
#define UTF32_CHAR_IS_BIDI(c)  ((IS_IN_BMP_RTL_BLOCK(c)) || \
283
                               (IS_RTL_PRESENTATION_FORM(c)) || \
284
                               (IS_IN_SMP_RTL_BLOCK(c)))
285
#endif  /* nsBidiUtils_h__ */