Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/unicharutil/util/nsUnicodeProperties.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/* vim:set ts=4 sw=4 sts=4 et cindent: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#ifndef NS_UNICODEPROPERTIES_H
8
#define NS_UNICODEPROPERTIES_H
9
10
#include "nsBidiUtils.h"
11
#include "nsUGenCategory.h"
12
#include "nsUnicodeScriptCodes.h"
13
#include "harfbuzz/hb.h"
14
15
#include "unicode/uchar.h"
16
#include "unicode/uscript.h"
17
18
const nsCharProps2& GetCharProps2(uint32_t aCh);
19
20
namespace mozilla {
21
22
namespace unicode {
23
24
extern const nsUGenCategory sDetailedToGeneralCategory[];
25
26
/* This MUST match the values assigned by genUnicodePropertyData.pl! */
27
enum VerticalOrientation {
28
  VERTICAL_ORIENTATION_U  = 0,
29
  VERTICAL_ORIENTATION_R  = 1,
30
  VERTICAL_ORIENTATION_Tu = 2,
31
  VERTICAL_ORIENTATION_Tr = 3
32
};
33
34
/* This MUST match the values assigned by genUnicodePropertyData.pl! */
35
enum PairedBracketType {
36
  PAIRED_BRACKET_TYPE_NONE = 0,
37
  PAIRED_BRACKET_TYPE_OPEN = 1,
38
  PAIRED_BRACKET_TYPE_CLOSE = 2
39
};
40
41
/* Flags for Unicode security IdentifierType.txt attributes. Only a subset
42
   of these are currently checked by Gecko, so we only define flags for the
43
   ones we need. */
44
enum IdentifierType {
45
  IDTYPE_RESTRICTED = 0,
46
  IDTYPE_ALLOWED = 1,
47
};
48
49
enum EmojiPresentation {
50
  TextOnly = 0,
51
  TextDefault = 1,
52
  EmojiDefault = 2
53
};
54
55
const uint32_t kVariationSelector15 = 0xFE0E; // text presentation
56
const uint32_t kVariationSelector16 = 0xFE0F; // emoji presentation
57
58
extern const hb_unicode_general_category_t sICUtoHBcategory[];
59
60
inline uint32_t
61
GetMirroredChar(uint32_t aCh)
62
{
63
  return u_charMirror(aCh);
64
}
65
66
inline bool
67
HasMirroredChar(uint32_t aCh)
68
{
69
  return u_isMirrored(aCh);
70
}
71
72
inline uint8_t
73
GetCombiningClass(uint32_t aCh)
74
{
75
  return u_getCombiningClass(aCh);
76
}
77
78
inline uint8_t
79
GetGeneralCategory(uint32_t aCh)
80
{
81
  return sICUtoHBcategory[u_charType(aCh)];
82
}
83
84
inline nsCharType
85
GetBidiCat(uint32_t aCh)
86
{
87
  return nsCharType(u_charDirection(aCh));
88
}
89
90
inline int8_t
91
GetNumericValue(uint32_t aCh)
92
{
93
  UNumericType type =
94
    UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
95
  return type == U_NT_DECIMAL || type == U_NT_DIGIT
96
         ? int8_t(u_getNumericValue(aCh)) : -1;
97
}
98
99
inline uint8_t
100
GetLineBreakClass(uint32_t aCh)
101
{
102
  return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
103
}
104
105
inline Script
106
GetScriptCode(uint32_t aCh)
107
86.2k
{
108
86.2k
  UErrorCode err = U_ZERO_ERROR;
109
86.2k
  return Script(uscript_getScript(aCh, &err));
110
86.2k
}
111
112
inline bool
113
HasScript(uint32_t aCh, Script aScript)
114
{
115
  return uscript_hasScript(aCh, UScriptCode(aScript));
116
}
117
118
inline uint32_t
119
GetScriptTagForCode(Script aScriptCode)
120
{
121
  const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
122
  if (tag) {
123
    return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
124
  }
125
  // return UNKNOWN script tag (running with older ICU?)
126
  return HB_SCRIPT_UNKNOWN;
127
}
128
129
inline PairedBracketType
130
GetPairedBracketType(uint32_t aCh)
131
{
132
  return PairedBracketType
133
           (u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
134
}
135
136
inline uint32_t
137
GetPairedBracket(uint32_t aCh)
138
{
139
  return u_getBidiPairedBracket(aCh);
140
}
141
142
inline uint32_t
143
GetUppercase(uint32_t aCh)
144
18.2k
{
145
18.2k
  return u_toupper(aCh);
146
18.2k
}
147
148
inline uint32_t
149
GetLowercase(uint32_t aCh)
150
18.2k
{
151
18.2k
  return u_tolower(aCh);
152
18.2k
}
153
154
inline uint32_t
155
GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged
156
0
{
157
0
  return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
158
0
}
159
160
inline uint32_t
161
GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase
162
{
163
  return u_totitle(aCh);
164
}
165
166
inline bool
167
IsEastAsianWidthFWH(uint32_t aCh)
168
0
{
169
0
  switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
170
0
    case U_EA_FULLWIDTH:
171
0
    case U_EA_WIDE:
172
0
    case U_EA_HALFWIDTH:
173
0
      return true;
174
0
    case U_EA_AMBIGUOUS:
175
0
    case U_EA_NARROW:
176
0
    case U_EA_NEUTRAL:
177
0
      return false;
178
0
  }
179
0
  return false;
180
0
}
181
182
inline bool
183
IsDefaultIgnorable(uint32_t aCh)
184
{
185
  return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
186
}
187
188
inline EmojiPresentation
189
GetEmojiPresentation(uint32_t aCh)
190
{
191
  if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
192
    return TextOnly;
193
  }
194
195
  if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
196
    return EmojiDefault;
197
  }
198
  return TextDefault;
199
}
200
201
// returns the simplified Gen Category as defined in nsUGenCategory
202
inline nsUGenCategory GetGenCategory(uint32_t aCh) {
203
  return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
204
}
205
206
inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
207
  return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
208
}
209
210
inline IdentifierType GetIdentifierType(uint32_t aCh) {
211
  return IdentifierType(GetCharProps2(aCh).mIdType);
212
}
213
214
uint32_t GetFullWidth(uint32_t aCh);
215
// This is the reverse function of GetFullWidth which guarantees that
216
// for every codepoint c, GetFullWidthInverse(GetFullWidth(c)) == c.
217
// Note that, this function does not guarantee to convert all wide
218
// form characters to their possible narrow form.
219
uint32_t GetFullWidthInverse(uint32_t aCh);
220
221
bool IsClusterExtender(uint32_t aCh, uint8_t aCategory);
222
223
0
inline bool IsClusterExtender(uint32_t aCh) {
224
0
  return IsClusterExtender(aCh, GetGeneralCategory(aCh));
225
0
}
226
227
// A simple iterator for a string of char16_t codepoints that advances
228
// by Unicode grapheme clusters
229
class ClusterIterator
230
{
231
public:
232
    ClusterIterator(const char16_t* aText, uint32_t aLength)
233
        : mPos(aText), mLimit(aText + aLength)
234
#ifdef DEBUG
235
        , mText(aText)
236
#endif
237
0
    { }
238
239
    operator const char16_t* () const {
240
        return mPos;
241
    }
242
243
0
    bool AtEnd() const {
244
0
        return mPos >= mLimit;
245
0
    }
246
247
    void Next();
248
249
private:
250
    const char16_t* mPos;
251
    const char16_t* mLimit;
252
#ifdef DEBUG
253
    const char16_t* mText;
254
#endif
255
};
256
257
// Count the number of grapheme clusters in the given string
258
uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
259
260
// A simple reverse iterator for a string of char16_t codepoints that
261
// advances by Unicode grapheme clusters
262
class ClusterReverseIterator
263
{
264
public:
265
    ClusterReverseIterator(const char16_t* aText, uint32_t aLength)
266
        : mPos(aText + aLength), mLimit(aText)
267
    { }
268
269
    operator const char16_t* () const {
270
        return mPos;
271
    }
272
273
0
    bool AtEnd() const {
274
0
        return mPos <= mLimit;
275
0
    }
276
277
    void Next();
278
279
private:
280
    const char16_t* mPos;
281
    const char16_t* mLimit;
282
};
283
284
} // end namespace unicode
285
286
} // end namespace mozilla
287
288
#endif /* NS_UNICODEPROPERTIES_H */