/src/mozilla-central/intl/unicharutil/util/nsUnicharUtils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #include "nsUnicharUtils.h" |
7 | | #include "nsUTF8Utils.h" |
8 | | #include "nsUnicodeProperties.h" |
9 | | #include "mozilla/Likely.h" |
10 | | #include "mozilla/HashFunctions.h" |
11 | | |
12 | | // We map x -> x, except for upper-case letters, |
13 | | // which we map to their lower-case equivalents. |
14 | | static const uint8_t gASCIIToLower [128] = { |
15 | | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
16 | | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
17 | | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
18 | | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, |
19 | | 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
20 | | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, |
21 | | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
22 | | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, |
23 | | }; |
24 | | |
25 | | // We want ToLowerCase(uint32_t) and ToLowerCaseASCII(uint32_t) to be fast |
26 | | // when they're called from within the case-insensitive comparators, so we |
27 | | // define inlined versions. |
28 | | static MOZ_ALWAYS_INLINE uint32_t |
29 | | ToLowerCase_inline(uint32_t aChar) |
30 | 8.96M | { |
31 | 8.96M | if (IS_ASCII(aChar)) { |
32 | 8.94M | return gASCIIToLower[aChar]; |
33 | 8.94M | } |
34 | 18.2k | |
35 | 18.2k | return mozilla::unicode::GetLowercase(aChar); |
36 | 18.2k | } |
37 | | |
38 | | static MOZ_ALWAYS_INLINE uint32_t |
39 | | ToLowerCaseASCII_inline(const uint32_t aChar) |
40 | 43.2k | { |
41 | 43.2k | if (IS_ASCII(aChar)) { |
42 | 42.6k | return gASCIIToLower[aChar]; |
43 | 42.6k | } |
44 | 562 | |
45 | 562 | return aChar; |
46 | 562 | } |
47 | | |
48 | | void |
49 | | ToLowerCase(nsAString& aString) |
50 | 233k | { |
51 | 233k | char16_t *buf = aString.BeginWriting(); |
52 | 233k | ToLowerCase(buf, buf, aString.Length()); |
53 | 233k | } |
54 | | |
55 | | void |
56 | | ToLowerCaseASCII(nsAString& aString) |
57 | 0 | { |
58 | 0 | char16_t *buf = aString.BeginWriting(); |
59 | 0 | ToLowerCaseASCII(buf, buf, aString.Length()); |
60 | 0 | } |
61 | | |
62 | | char |
63 | | ToLowerCaseASCII(char aChar) |
64 | 8.48k | { |
65 | 8.48k | if (aChar >= 'A' && aChar <= 'Z') { |
66 | 489 | return aChar + 0x20; |
67 | 489 | } |
68 | 7.99k | return aChar; |
69 | 7.99k | } |
70 | | |
71 | | char16_t |
72 | | ToLowerCaseASCII(char16_t aChar) |
73 | 0 | { |
74 | 0 | if (aChar >= 'A' && aChar <= 'Z') { |
75 | 0 | return aChar + 0x20; |
76 | 0 | } |
77 | 0 | return aChar; |
78 | 0 | } |
79 | | |
80 | | char32_t |
81 | | ToLowerCaseASCII(char32_t aChar) |
82 | 0 | { |
83 | 0 | if (aChar >= 'A' && aChar <= 'Z') { |
84 | 0 | return aChar + 0x20; |
85 | 0 | } |
86 | 0 | return aChar; |
87 | 0 | } |
88 | | |
89 | | char |
90 | | ToUpperCaseASCII(char aChar) |
91 | 0 | { |
92 | 0 | if (aChar >= 'a' && aChar <= 'z') { |
93 | 0 | return aChar - 0x20; |
94 | 0 | } |
95 | 0 | return aChar; |
96 | 0 | } |
97 | | |
98 | | char16_t |
99 | | ToUpperCaseASCII(char16_t aChar) |
100 | 0 | { |
101 | 0 | if (aChar >= 'a' && aChar <= 'z') { |
102 | 0 | return aChar - 0x20; |
103 | 0 | } |
104 | 0 | return aChar; |
105 | 0 | } |
106 | | |
107 | | char32_t |
108 | | ToUpperCaseASCII(char32_t aChar) |
109 | 0 | { |
110 | 0 | if (aChar >= 'a' && aChar <= 'z') { |
111 | 0 | return aChar - 0x20; |
112 | 0 | } |
113 | 0 | return aChar; |
114 | 0 | } |
115 | | |
116 | | void |
117 | | ToLowerCase(const nsAString& aSource, |
118 | | nsAString& aDest) |
119 | 176k | { |
120 | 176k | const char16_t *in = aSource.BeginReading(); |
121 | 176k | uint32_t len = aSource.Length(); |
122 | 176k | |
123 | 176k | aDest.SetLength(len); |
124 | 176k | char16_t *out = aDest.BeginWriting(); |
125 | 176k | |
126 | 176k | ToLowerCase(in, out, len); |
127 | 176k | } |
128 | | |
129 | | void |
130 | | ToLowerCaseASCII(const nsAString& aSource, |
131 | | nsAString& aDest) |
132 | 0 | { |
133 | 0 | const char16_t *in = aSource.BeginReading(); |
134 | 0 | uint32_t len = aSource.Length(); |
135 | 0 |
|
136 | 0 | aDest.SetLength(len); |
137 | 0 | char16_t *out = aDest.BeginWriting(); |
138 | 0 |
|
139 | 0 | ToLowerCaseASCII(in, out, len); |
140 | 0 | } |
141 | | |
142 | | uint32_t |
143 | | ToLowerCaseASCII(const uint32_t aChar) |
144 | 0 | { |
145 | 0 | return ToLowerCaseASCII_inline(aChar); |
146 | 0 | } |
147 | | |
148 | | void |
149 | | ToUpperCase(nsAString& aString) |
150 | 0 | { |
151 | 0 | char16_t *buf = aString.BeginWriting(); |
152 | 0 | ToUpperCase(buf, buf, aString.Length()); |
153 | 0 | } |
154 | | |
155 | | void |
156 | | ToUpperCase(const nsAString& aSource, |
157 | | nsAString& aDest) |
158 | 0 | { |
159 | 0 | const char16_t *in = aSource.BeginReading(); |
160 | 0 | uint32_t len = aSource.Length(); |
161 | 0 |
|
162 | 0 | aDest.SetLength(len); |
163 | 0 | char16_t *out = aDest.BeginWriting(); |
164 | 0 |
|
165 | 0 | ToUpperCase(in, out, len); |
166 | 0 | } |
167 | | |
168 | | #ifdef MOZILLA_INTERNAL_API |
169 | | |
170 | | int32_t |
171 | | nsCaseInsensitiveStringComparator::operator()(const char16_t* lhs, |
172 | | const char16_t* rhs, |
173 | | uint32_t lLength, |
174 | | uint32_t rLength) const |
175 | 0 | { |
176 | 0 | return (lLength == rLength) ? CaseInsensitiveCompare(lhs, rhs, lLength) : |
177 | 0 | (lLength > rLength) ? 1 : -1; |
178 | 0 | } |
179 | | |
180 | | int32_t |
181 | | nsCaseInsensitiveUTF8StringComparator::operator()(const char* lhs, |
182 | | const char* rhs, |
183 | | uint32_t lLength, |
184 | | uint32_t rLength) const |
185 | 0 | { |
186 | 0 | return CaseInsensitiveCompare(lhs, rhs, lLength, rLength); |
187 | 0 | } |
188 | | |
189 | | int32_t |
190 | | nsASCIICaseInsensitiveStringComparator::operator()(const char16_t* lhs, |
191 | | const char16_t* rhs, |
192 | | uint32_t lLength, |
193 | | uint32_t rLength) const |
194 | 22.9k | { |
195 | 22.9k | if (lLength != rLength) { |
196 | 0 | if (lLength > rLength) |
197 | 0 | return 1; |
198 | 0 | return -1; |
199 | 0 | } |
200 | 22.9k | |
201 | 35.7k | while (rLength) { |
202 | 34.4k | // we don't care about surrogates here, because we're only |
203 | 34.4k | // lowercasing the ASCII range |
204 | 34.4k | char16_t l = *lhs++; |
205 | 34.4k | char16_t r = *rhs++; |
206 | 34.4k | if (l != r) { |
207 | 21.6k | l = ToLowerCaseASCII_inline(l); |
208 | 21.6k | r = ToLowerCaseASCII_inline(r); |
209 | 21.6k | |
210 | 21.6k | if (l > r) |
211 | 20.8k | return 1; |
212 | 746 | else if (r > l) |
213 | 745 | return -1; |
214 | 12.8k | } |
215 | 12.8k | rLength--; |
216 | 12.8k | } |
217 | 22.9k | |
218 | 22.9k | return 0; |
219 | 22.9k | } |
220 | | |
221 | | #endif // MOZILLA_INTERNAL_API |
222 | | |
223 | | uint32_t |
224 | | ToLowerCase(uint32_t aChar) |
225 | 8.96M | { |
226 | 8.96M | return ToLowerCase_inline(aChar); |
227 | 8.96M | } |
228 | | |
229 | | void |
230 | | ToLowerCase(const char16_t *aIn, char16_t *aOut, uint32_t aLen) |
231 | 410k | { |
232 | 1.17M | for (uint32_t i = 0; i < aLen; i++) { |
233 | 763k | uint32_t ch = aIn[i]; |
234 | 763k | if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && |
235 | 763k | NS_IS_LOW_SURROGATE(aIn[i + 1])) { |
236 | 0 | ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); |
237 | 0 | NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); |
238 | 0 | aOut[i++] = H_SURROGATE(ch); |
239 | 0 | aOut[i] = L_SURROGATE(ch); |
240 | 0 | continue; |
241 | 0 | } |
242 | 763k | aOut[i] = ToLowerCase(ch); |
243 | 763k | } |
244 | 410k | } |
245 | | |
246 | | void |
247 | | ToLowerCaseASCII(const char16_t *aIn, char16_t *aOut, uint32_t aLen) |
248 | 0 | { |
249 | 0 | for (uint32_t i = 0; i < aLen; i++) { |
250 | 0 | char16_t ch = aIn[i]; |
251 | 0 | aOut[i] = IS_ASCII_UPPER(ch) ? (ch + 0x20) : ch; |
252 | 0 | } |
253 | 0 | } |
254 | | |
255 | | uint32_t |
256 | | ToUpperCase(uint32_t aChar) |
257 | 8.20M | { |
258 | 8.20M | if (IS_ASCII(aChar)) { |
259 | 8.18M | if (IS_ASCII_LOWER(aChar)) { |
260 | 63.5k | return aChar - 0x20; |
261 | 63.5k | } |
262 | 8.12M | return aChar; |
263 | 8.12M | } |
264 | 18.2k | |
265 | 18.2k | return mozilla::unicode::GetUppercase(aChar); |
266 | 18.2k | } |
267 | | |
268 | | void |
269 | | ToUpperCase(const char16_t *aIn, char16_t *aOut, uint32_t aLen) |
270 | 0 | { |
271 | 0 | for (uint32_t i = 0; i < aLen; i++) { |
272 | 0 | uint32_t ch = aIn[i]; |
273 | 0 | if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && |
274 | 0 | NS_IS_LOW_SURROGATE(aIn[i + 1])) { |
275 | 0 | ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); |
276 | 0 | NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); |
277 | 0 | aOut[i++] = H_SURROGATE(ch); |
278 | 0 | aOut[i] = L_SURROGATE(ch); |
279 | 0 | continue; |
280 | 0 | } |
281 | 0 | aOut[i] = ToUpperCase(ch); |
282 | 0 | } |
283 | 0 | } |
284 | | |
285 | | uint32_t |
286 | | ToTitleCase(uint32_t aChar) |
287 | 0 | { |
288 | 0 | if (IS_ASCII(aChar)) { |
289 | 0 | return ToUpperCase(aChar); |
290 | 0 | } |
291 | 0 | |
292 | 0 | return mozilla::unicode::GetTitlecaseForLower(aChar); |
293 | 0 | } |
294 | | |
295 | | int32_t |
296 | | CaseInsensitiveCompare(const char16_t *a, |
297 | | const char16_t *b, |
298 | | uint32_t len) |
299 | 0 | { |
300 | 0 | NS_ASSERTION(a && b, "Do not pass in invalid pointers!"); |
301 | 0 |
|
302 | 0 | if (len) { |
303 | 0 | do { |
304 | 0 | uint32_t c1 = *a++; |
305 | 0 | uint32_t c2 = *b++; |
306 | 0 |
|
307 | 0 | // Unfortunately, we need to check for surrogates BEFORE we check |
308 | 0 | // for equality, because we could have identical high surrogates |
309 | 0 | // but non-identical characters, so we can't just skip them |
310 | 0 |
|
311 | 0 | // If c1 isn't a surrogate, we don't bother to check c2; |
312 | 0 | // in the case where it _is_ a surrogate, we're definitely going to get |
313 | 0 | // a mismatch, and don't need to interpret and lowercase it |
314 | 0 |
|
315 | 0 | if (NS_IS_HIGH_SURROGATE(c1) && len > 1 && NS_IS_LOW_SURROGATE(*a)) { |
316 | 0 | c1 = SURROGATE_TO_UCS4(c1, *a++); |
317 | 0 | if (NS_IS_HIGH_SURROGATE(c2) && NS_IS_LOW_SURROGATE(*b)) { |
318 | 0 | c2 = SURROGATE_TO_UCS4(c2, *b++); |
319 | 0 | } |
320 | 0 | // If c2 wasn't a surrogate, decrementing len means we'd stop |
321 | 0 | // short of the end of string b, but that doesn't actually matter |
322 | 0 | // because we're going to find a mismatch and return early |
323 | 0 | --len; |
324 | 0 | } |
325 | 0 |
|
326 | 0 | if (c1 != c2) { |
327 | 0 | c1 = ToLowerCase_inline(c1); |
328 | 0 | c2 = ToLowerCase_inline(c2); |
329 | 0 | if (c1 != c2) { |
330 | 0 | if (c1 < c2) { |
331 | 0 | return -1; |
332 | 0 | } |
333 | 0 | return 1; |
334 | 0 | } |
335 | 0 | } |
336 | 0 | } while (--len != 0); |
337 | 0 | } |
338 | 0 | return 0; |
339 | 0 | } |
340 | | |
341 | | // Inlined definition of GetLowerUTF8Codepoint, which we use because we want |
342 | | // to be fast when called from the case-insensitive comparators. |
343 | | static MOZ_ALWAYS_INLINE uint32_t |
344 | | GetLowerUTF8Codepoint_inline(const char* aStr, |
345 | | const char* aEnd, |
346 | | const char **aNext) |
347 | 0 | { |
348 | 0 | // Convert to unsigned char so that stuffing chars into PRUint32s doesn't |
349 | 0 | // sign extend. |
350 | 0 | const unsigned char *str = (unsigned char*)aStr; |
351 | 0 |
|
352 | 0 | if (UTF8traits::isASCII(str[0])) { |
353 | 0 | // It's ASCII; just convert to lower-case and return it. |
354 | 0 | *aNext = aStr + 1; |
355 | 0 | return gASCIIToLower[*str]; |
356 | 0 | } |
357 | 0 | if (UTF8traits::is2byte(str[0]) && MOZ_LIKELY(aStr + 1 < aEnd)) { |
358 | 0 | // It's a two-byte sequence, so it looks like |
359 | 0 | // 110XXXXX 10XXXXXX. |
360 | 0 | // This is definitely in the BMP, so we can store straightaway into a |
361 | 0 | // uint16_t. |
362 | 0 |
|
363 | 0 | uint16_t c; |
364 | 0 | c = (str[0] & 0x1F) << 6; |
365 | 0 | c += (str[1] & 0x3F); |
366 | 0 |
|
367 | 0 | // we don't go through ToLowerCase here, because we know this isn't |
368 | 0 | // an ASCII character so the ASCII fast-path there is useless |
369 | 0 | c = mozilla::unicode::GetLowercase(c); |
370 | 0 |
|
371 | 0 | *aNext = aStr + 2; |
372 | 0 | return c; |
373 | 0 | } |
374 | 0 | if (UTF8traits::is3byte(str[0]) && MOZ_LIKELY(aStr + 2 < aEnd)) { |
375 | 0 | // It's a three-byte sequence, so it looks like |
376 | 0 | // 1110XXXX 10XXXXXX 10XXXXXX. |
377 | 0 | // This will just barely fit into 16-bits, so store into a uint16_t. |
378 | 0 |
|
379 | 0 | uint16_t c; |
380 | 0 | c = (str[0] & 0x0F) << 12; |
381 | 0 | c += (str[1] & 0x3F) << 6; |
382 | 0 | c += (str[2] & 0x3F); |
383 | 0 |
|
384 | 0 | c = mozilla::unicode::GetLowercase(c); |
385 | 0 |
|
386 | 0 | *aNext = aStr + 3; |
387 | 0 | return c; |
388 | 0 | } |
389 | 0 | if (UTF8traits::is4byte(str[0]) && MOZ_LIKELY(aStr + 3 < aEnd)) { |
390 | 0 | // It's a four-byte sequence, so it looks like |
391 | 0 | // 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX. |
392 | 0 |
|
393 | 0 | uint32_t c; |
394 | 0 | c = (str[0] & 0x07) << 18; |
395 | 0 | c += (str[1] & 0x3F) << 12; |
396 | 0 | c += (str[2] & 0x3F) << 6; |
397 | 0 | c += (str[3] & 0x3F); |
398 | 0 |
|
399 | 0 | c = mozilla::unicode::GetLowercase(c); |
400 | 0 |
|
401 | 0 | *aNext = aStr + 4; |
402 | 0 | return c; |
403 | 0 | } |
404 | 0 | |
405 | 0 | // Hm, we don't understand this sequence. |
406 | 0 | return -1; |
407 | 0 | } |
408 | | |
409 | | uint32_t |
410 | 0 | GetLowerUTF8Codepoint(const char* aStr, const char* aEnd, const char **aNext) { |
411 | 0 | return GetLowerUTF8Codepoint_inline(aStr, aEnd, aNext); |
412 | 0 | } |
413 | | |
414 | | int32_t CaseInsensitiveCompare(const char *aLeft, |
415 | | const char *aRight, |
416 | | uint32_t aLeftBytes, |
417 | | uint32_t aRightBytes) |
418 | 0 | { |
419 | 0 | const char *leftEnd = aLeft + aLeftBytes; |
420 | 0 | const char *rightEnd = aRight + aRightBytes; |
421 | 0 |
|
422 | 0 | while (aLeft < leftEnd && aRight < rightEnd) { |
423 | 0 | uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, leftEnd, &aLeft); |
424 | 0 | if (MOZ_UNLIKELY(leftChar == uint32_t(-1))) |
425 | 0 | return -1; |
426 | 0 | |
427 | 0 | uint32_t rightChar = GetLowerUTF8Codepoint_inline(aRight, rightEnd, &aRight); |
428 | 0 | if (MOZ_UNLIKELY(rightChar == uint32_t(-1))) |
429 | 0 | return -1; |
430 | 0 | |
431 | 0 | // Now leftChar and rightChar are lower-case, so we can compare them. |
432 | 0 | if (leftChar != rightChar) { |
433 | 0 | if (leftChar > rightChar) |
434 | 0 | return 1; |
435 | 0 | return -1; |
436 | 0 | } |
437 | 0 | } |
438 | 0 |
|
439 | 0 | // Make sure that if one string is longer than the other we return the |
440 | 0 | // correct result. |
441 | 0 | if (aLeft < leftEnd) |
442 | 0 | return 1; |
443 | 0 | if (aRight < rightEnd) |
444 | 0 | return -1; |
445 | 0 | |
446 | 0 | return 0; |
447 | 0 | } |
448 | | |
449 | | bool |
450 | | CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight, |
451 | | const char* aLeftEnd, const char* aRightEnd, |
452 | | const char** aLeftNext, const char** aRightNext, |
453 | | bool* aErr) |
454 | 0 | { |
455 | 0 | NS_ASSERTION(aLeftNext, "Out pointer shouldn't be null."); |
456 | 0 | NS_ASSERTION(aRightNext, "Out pointer shouldn't be null."); |
457 | 0 | NS_ASSERTION(aErr, "Out pointer shouldn't be null."); |
458 | 0 | NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd."); |
459 | 0 | NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd."); |
460 | 0 |
|
461 | 0 | uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext); |
462 | 0 | if (MOZ_UNLIKELY(leftChar == uint32_t(-1))) { |
463 | 0 | *aErr = true; |
464 | 0 | return false; |
465 | 0 | } |
466 | 0 | |
467 | 0 | uint32_t rightChar = GetLowerUTF8Codepoint_inline(aRight, aRightEnd, aRightNext); |
468 | 0 | if (MOZ_UNLIKELY(rightChar == uint32_t(-1))) { |
469 | 0 | *aErr = true; |
470 | 0 | return false; |
471 | 0 | } |
472 | 0 | |
473 | 0 | // Can't have an error past this point. |
474 | 0 | *aErr = false; |
475 | 0 |
|
476 | 0 | return leftChar == rightChar; |
477 | 0 | } |
478 | | |
479 | | namespace mozilla { |
480 | | |
481 | | uint32_t |
482 | | HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) |
483 | 11.7k | { |
484 | 11.7k | uint32_t hash = 0; |
485 | 11.7k | const char* s = aUTF8; |
486 | 11.7k | const char* end = aUTF8 + aLength; |
487 | 11.7k | |
488 | 11.7k | *aErr = false; |
489 | 11.7k | |
490 | 117k | while (s < end) |
491 | 105k | { |
492 | 105k | uint32_t ucs4 = UTF8CharEnumerator::NextChar(&s, end, aErr); |
493 | 105k | if (*aErr) { |
494 | 0 | return 0; |
495 | 0 | } |
496 | 105k | |
497 | 105k | if (ucs4 < PLANE1_BASE) { |
498 | 105k | hash = AddToHash(hash, ucs4); |
499 | 105k | } |
500 | 0 | else { |
501 | 0 | hash = AddToHash(hash, H_SURROGATE(ucs4), L_SURROGATE(ucs4)); |
502 | 0 | } |
503 | 105k | } |
504 | 11.7k | |
505 | 11.7k | return hash; |
506 | 11.7k | } |
507 | | |
508 | | bool |
509 | | IsSegmentBreakSkipChar(uint32_t u) |
510 | 0 | { |
511 | 0 | return unicode::IsEastAsianWidthFWH(u) && |
512 | 0 | unicode::GetScriptCode(u) != unicode::Script::HANGUL; |
513 | 0 | } |
514 | | |
515 | | } // namespace mozilla |