/src/icu/source/i18n/collationfastlatinbuilder.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2013-2016, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * collationfastlatinbuilder.h |
9 | | * |
10 | | * created on: 2013aug09 |
11 | | * created by: Markus W. Scherer |
12 | | */ |
13 | | |
14 | | #ifndef __COLLATIONFASTLATINBUILDER_H__ |
15 | | #define __COLLATIONFASTLATINBUILDER_H__ |
16 | | |
17 | | #include "unicode/utypes.h" |
18 | | |
19 | | #if !UCONFIG_NO_COLLATION |
20 | | |
21 | | #include "unicode/ucol.h" |
22 | | #include "unicode/unistr.h" |
23 | | #include "unicode/uobject.h" |
24 | | #include "collation.h" |
25 | | #include "collationfastlatin.h" |
26 | | #include "uvectr64.h" |
27 | | |
28 | | U_NAMESPACE_BEGIN |
29 | | |
30 | | struct CollationData; |
31 | | |
32 | | class U_I18N_API CollationFastLatinBuilder : public UObject { |
33 | | public: |
34 | | CollationFastLatinBuilder(UErrorCode &errorCode); |
35 | | ~CollationFastLatinBuilder(); |
36 | | |
37 | | UBool forData(const CollationData &data, UErrorCode &errorCode); |
38 | | |
39 | 0 | const uint16_t *getTable() const { |
40 | 0 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); |
41 | 0 | } |
42 | 0 | int32_t lengthOfTable() const { return result.length(); } |
43 | | |
44 | | private: |
45 | | // space, punct, symbol, currency (not digit) |
46 | | enum { NUM_SPECIAL_GROUPS = UCOL_REORDER_CODE_CURRENCY - UCOL_REORDER_CODE_FIRST + 1 }; |
47 | | |
48 | | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); |
49 | | UBool inSameGroup(uint32_t p, uint32_t q) const; |
50 | | |
51 | | void resetCEs(); |
52 | | void getCEs(const CollationData &data, UErrorCode &errorCode); |
53 | | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, |
54 | | UErrorCode &errorCode); |
55 | | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, |
56 | | UErrorCode &errorCode); |
57 | | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); |
58 | | void addUniqueCE(int64_t ce, UErrorCode &errorCode); |
59 | | uint32_t getMiniCE(int64_t ce) const; |
60 | | UBool encodeUniqueCEs(UErrorCode &errorCode); |
61 | | UBool encodeCharCEs(UErrorCode &errorCode); |
62 | | UBool encodeContractions(UErrorCode &errorCode); |
63 | | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; |
64 | | |
65 | 0 | static UBool isContractionCharCE(int64_t ce) { |
66 | 0 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; |
67 | 0 | } |
68 | | |
69 | | static const uint32_t CONTRACTION_FLAG = 0x80000000; |
70 | | |
71 | | // temporary "buffer" |
72 | | int64_t ce0, ce1; |
73 | | |
74 | | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; |
75 | | |
76 | | UVector64 contractionCEs; |
77 | | UVector64 uniqueCEs; |
78 | | |
79 | | /** One 16-bit mini CE per unique CE. */ |
80 | | uint16_t *miniCEs; |
81 | | |
82 | | // These are constant for a given root collator. |
83 | | uint32_t lastSpecialPrimaries[NUM_SPECIAL_GROUPS]; |
84 | | uint32_t firstDigitPrimary; |
85 | | uint32_t firstLatinPrimary; |
86 | | uint32_t lastLatinPrimary; |
87 | | // This determines the first normal primary weight which is mapped to |
88 | | // a short mini primary. It must be >=firstDigitPrimary. |
89 | | uint32_t firstShortPrimary; |
90 | | |
91 | | UBool shortPrimaryOverflow; |
92 | | |
93 | | UnicodeString result; |
94 | | int32_t headerLength; |
95 | | }; |
96 | | |
97 | | U_NAMESPACE_END |
98 | | |
99 | | #endif // !UCONFIG_NO_COLLATION |
100 | | #endif // __COLLATIONFASTLATINBUILDER_H__ |