/src/icu/source/i18n/collationkeys.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2012-2014, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * collationkeys.h |
9 | | * |
10 | | * created on: 2012sep02 |
11 | | * created by: Markus W. Scherer |
12 | | */ |
13 | | |
14 | | #ifndef __COLLATIONKEYS_H__ |
15 | | #define __COLLATIONKEYS_H__ |
16 | | |
17 | | #include "unicode/utypes.h" |
18 | | |
19 | | #if !UCONFIG_NO_COLLATION |
20 | | |
21 | | #include "unicode/bytestream.h" |
22 | | #include "unicode/ucol.h" |
23 | | #include "charstr.h" |
24 | | #include "collation.h" |
25 | | |
26 | | U_NAMESPACE_BEGIN |
27 | | |
28 | | class CollationIterator; |
29 | | struct CollationDataReader; |
30 | | struct CollationSettings; |
31 | | |
32 | | class SortKeyByteSink : public ByteSink { |
33 | | public: |
34 | | SortKeyByteSink(char *dest, int32_t destCapacity) |
35 | 0 | : buffer_(dest), capacity_(destCapacity), |
36 | 0 | appended_(0), ignore_(0) {} |
37 | | virtual ~SortKeyByteSink(); |
38 | | |
39 | 0 | void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } |
40 | | |
41 | | virtual void Append(const char *bytes, int32_t n); |
42 | 0 | void Append(uint32_t b) { |
43 | 0 | if (ignore_ > 0) { |
44 | 0 | --ignore_; |
45 | 0 | } else { |
46 | 0 | if (appended_ < capacity_ || Resize(1, appended_)) { |
47 | 0 | buffer_[appended_] = (char)b; |
48 | 0 | } |
49 | 0 | ++appended_; |
50 | 0 | } |
51 | 0 | } |
52 | | virtual char *GetAppendBuffer(int32_t min_capacity, |
53 | | int32_t desired_capacity_hint, |
54 | | char *scratch, int32_t scratch_capacity, |
55 | | int32_t *result_capacity); |
56 | 0 | int32_t NumberOfBytesAppended() const { return appended_; } |
57 | | |
58 | | /** |
59 | | * @return how many bytes can be appended (including ignored ones) |
60 | | * without reallocation |
61 | | */ |
62 | 0 | int32_t GetRemainingCapacity() const { |
63 | | // Either ignore_ or appended_ should be 0. |
64 | 0 | return ignore_ + capacity_ - appended_; |
65 | 0 | } |
66 | | |
67 | 0 | UBool Overflowed() const { return appended_ > capacity_; } |
68 | | /** @return false if memory allocation failed */ |
69 | 0 | UBool IsOk() const { return buffer_ != NULL; } |
70 | | |
71 | | protected: |
72 | | virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; |
73 | | virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; |
74 | | |
75 | 0 | void SetNotOk() { |
76 | 0 | buffer_ = NULL; |
77 | 0 | capacity_ = 0; |
78 | 0 | } |
79 | | |
80 | | char *buffer_; |
81 | | int32_t capacity_; |
82 | | int32_t appended_; |
83 | | int32_t ignore_; |
84 | | |
85 | | private: |
86 | | SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented |
87 | | SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented |
88 | | }; |
89 | | |
90 | | class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { |
91 | | public: |
92 | | class LevelCallback : public UMemory { |
93 | | public: |
94 | | virtual ~LevelCallback(); |
95 | | /** |
96 | | * @param level The next level about to be written to the ByteSink. |
97 | | * @return true if the level is to be written |
98 | | * (the base class implementation always returns true) |
99 | | */ |
100 | | virtual UBool needToWrite(Collation::Level level); |
101 | | }; |
102 | | |
103 | | /** |
104 | | * Writes the sort key bytes for minLevel up to the iterator data's strength. |
105 | | * Optionally writes the case level. |
106 | | * Stops writing levels when callback.needToWrite(level) returns false. |
107 | | * Separates levels with the LEVEL_SEPARATOR_BYTE |
108 | | * but does not write a TERMINATOR_BYTE. |
109 | | */ |
110 | | static void writeSortKeyUpToQuaternary(CollationIterator &iter, |
111 | | const UBool *compressibleBytes, |
112 | | const CollationSettings &settings, |
113 | | SortKeyByteSink &sink, |
114 | | Collation::Level minLevel, LevelCallback &callback, |
115 | | UBool preflight, UErrorCode &errorCode); |
116 | | private: |
117 | | friend struct CollationDataReader; |
118 | | |
119 | | CollationKeys(); // no instantiation |
120 | | |
121 | | // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. |
122 | | static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; |
123 | | static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; |
124 | | static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; |
125 | | static const int32_t SEC_COMMON_MAX_COUNT = 0x21; |
126 | | |
127 | | // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. |
128 | | static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; |
129 | | static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; |
130 | | static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; |
131 | | static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; |
132 | | |
133 | | // Case level, upperFirst: Compress up to 13 common weights as 3..15. |
134 | | static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; |
135 | | static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; |
136 | | static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; |
137 | | |
138 | | // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. |
139 | | static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; |
140 | | static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; |
141 | | static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; |
142 | | static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; |
143 | | |
144 | | // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. |
145 | | static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; |
146 | | static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; |
147 | | static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; |
148 | | static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; |
149 | | |
150 | | // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. |
151 | | static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; |
152 | | static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; |
153 | | static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; |
154 | | static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; |
155 | | |
156 | | // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. |
157 | | static const uint32_t QUAT_COMMON_LOW = 0x1c; |
158 | | static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; |
159 | | static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; |
160 | | static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; |
161 | | // Primary weights shifted to quaternary level must be encoded with |
162 | | // a lead byte below the common-weight compression range. |
163 | | static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b |
164 | | }; |
165 | | |
166 | | U_NAMESPACE_END |
167 | | |
168 | | #endif // !UCONFIG_NO_COLLATION |
169 | | #endif // __COLLATIONKEYS_H__ |