/src/icu/source/i18n/sortkey.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 1996-2012, International Business Machines Corporation and |
6 | | * others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | */ |
9 | | //=============================================================================== |
10 | | // |
11 | | // File sortkey.cpp |
12 | | // |
13 | | // |
14 | | // |
15 | | // Created by: Helena Shih |
16 | | // |
17 | | // Modification History: |
18 | | // |
19 | | // Date Name Description |
20 | | // |
21 | | // 6/20/97 helena Java class name change. |
22 | | // 6/23/97 helena Added comments to make code more readable. |
23 | | // 6/26/98 erm Changed to use byte arrays instead of UnicodeString |
24 | | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, |
25 | | // Cleaned up operator= |
26 | | // 07/12/99 helena HPUX 11 CC port. |
27 | | // 03/06/01 synwee Modified compareTo, to handle the result of |
28 | | // 2 string similar in contents, but one is longer |
29 | | // than the other |
30 | | //=============================================================================== |
31 | | |
32 | | #include "unicode/utypes.h" |
33 | | |
34 | | #if !UCONFIG_NO_COLLATION |
35 | | |
36 | | #include "unicode/sortkey.h" |
37 | | #include "cmemory.h" |
38 | | #include "uelement.h" |
39 | | #include "ustr_imp.h" |
40 | | |
41 | | U_NAMESPACE_BEGIN |
42 | | |
43 | | // A hash code of kInvalidHashCode indicates that the hash code needs |
44 | | // to be computed. A hash code of kEmptyHashCode is used for empty keys |
45 | | // and for any key whose computed hash code is kInvalidHashCode. |
46 | | static const int32_t kInvalidHashCode = 0; |
47 | | static const int32_t kEmptyHashCode = 1; |
48 | | // The "bogus hash code" replaces a separate fBogus flag. |
49 | | static const int32_t kBogusHashCode = 2; |
50 | | |
51 | | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
52 | | |
53 | | CollationKey::CollationKey() |
54 | 0 | : UObject(), fFlagAndLength(0), |
55 | 0 | fHashCode(kEmptyHashCode) |
56 | 0 | { |
57 | 0 | } |
58 | | |
59 | | // Create a collation key from a bit array. |
60 | | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) |
61 | 0 | : UObject(), fFlagAndLength(count), |
62 | 0 | fHashCode(kInvalidHashCode) |
63 | 0 | { |
64 | 0 | if (count < 0 || (newValues == NULL && count != 0) || |
65 | 0 | (count > getCapacity() && reallocate(count, 0) == NULL)) { |
66 | 0 | setToBogus(); |
67 | 0 | return; |
68 | 0 | } |
69 | | |
70 | 0 | if (count > 0) { |
71 | 0 | uprv_memcpy(getBytes(), newValues, count); |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | CollationKey::CollationKey(const CollationKey& other) |
76 | 0 | : UObject(other), fFlagAndLength(other.getLength()), |
77 | 0 | fHashCode(other.fHashCode) |
78 | 0 | { |
79 | 0 | if (other.isBogus()) |
80 | 0 | { |
81 | 0 | setToBogus(); |
82 | 0 | return; |
83 | 0 | } |
84 | | |
85 | 0 | int32_t length = fFlagAndLength; |
86 | 0 | if (length > getCapacity() && reallocate(length, 0) == NULL) { |
87 | 0 | setToBogus(); |
88 | 0 | return; |
89 | 0 | } |
90 | | |
91 | 0 | if (length > 0) { |
92 | 0 | uprv_memcpy(getBytes(), other.getBytes(), length); |
93 | 0 | } |
94 | 0 | } |
95 | | |
96 | | CollationKey::~CollationKey() |
97 | 0 | { |
98 | 0 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
99 | 0 | } |
100 | | |
101 | 0 | uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { |
102 | 0 | uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); |
103 | 0 | if(newBytes == NULL) { return NULL; } |
104 | 0 | if(length > 0) { |
105 | 0 | uprv_memcpy(newBytes, getBytes(), length); |
106 | 0 | } |
107 | 0 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
108 | 0 | fUnion.fFields.fBytes = newBytes; |
109 | 0 | fUnion.fFields.fCapacity = newCapacity; |
110 | 0 | fFlagAndLength |= 0x80000000; |
111 | 0 | return newBytes; |
112 | 0 | } |
113 | | |
114 | 0 | void CollationKey::setLength(int32_t newLength) { |
115 | | // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); |
116 | 0 | fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; |
117 | 0 | fHashCode = kInvalidHashCode; |
118 | 0 | } |
119 | | |
120 | | // set the key to an empty state |
121 | | CollationKey& |
122 | | CollationKey::reset() |
123 | 0 | { |
124 | 0 | fFlagAndLength &= 0x80000000; |
125 | 0 | fHashCode = kEmptyHashCode; |
126 | |
|
127 | 0 | return *this; |
128 | 0 | } |
129 | | |
130 | | // set the key to a "bogus" or invalid state |
131 | | CollationKey& |
132 | | CollationKey::setToBogus() |
133 | 0 | { |
134 | 0 | fFlagAndLength &= 0x80000000; |
135 | 0 | fHashCode = kBogusHashCode; |
136 | |
|
137 | 0 | return *this; |
138 | 0 | } |
139 | | |
140 | | bool |
141 | | CollationKey::operator==(const CollationKey& source) const |
142 | 0 | { |
143 | 0 | return getLength() == source.getLength() && |
144 | 0 | (this == &source || |
145 | 0 | uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); |
146 | 0 | } |
147 | | |
148 | | const CollationKey& |
149 | | CollationKey::operator=(const CollationKey& other) |
150 | 0 | { |
151 | 0 | if (this != &other) |
152 | 0 | { |
153 | 0 | if (other.isBogus()) |
154 | 0 | { |
155 | 0 | return setToBogus(); |
156 | 0 | } |
157 | | |
158 | 0 | int32_t length = other.getLength(); |
159 | 0 | if (length > getCapacity() && reallocate(length, 0) == NULL) { |
160 | 0 | return setToBogus(); |
161 | 0 | } |
162 | 0 | if (length > 0) { |
163 | 0 | uprv_memcpy(getBytes(), other.getBytes(), length); |
164 | 0 | } |
165 | 0 | fFlagAndLength = (fFlagAndLength & 0x80000000) | length; |
166 | 0 | fHashCode = other.fHashCode; |
167 | 0 | } |
168 | | |
169 | 0 | return *this; |
170 | 0 | } |
171 | | |
172 | | // Bitwise comparison for the collation keys. |
173 | | Collator::EComparisonResult |
174 | | CollationKey::compareTo(const CollationKey& target) const |
175 | 0 | { |
176 | 0 | UErrorCode errorCode = U_ZERO_ERROR; |
177 | 0 | return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); |
178 | 0 | } |
179 | | |
180 | | // Bitwise comparison for the collation keys. |
181 | | UCollationResult |
182 | | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const |
183 | 0 | { |
184 | 0 | if(U_SUCCESS(status)) { |
185 | 0 | const uint8_t *src = getBytes(); |
186 | 0 | const uint8_t *tgt = target.getBytes(); |
187 | | |
188 | | // are we comparing the same string |
189 | 0 | if (src == tgt) |
190 | 0 | return UCOL_EQUAL; |
191 | | |
192 | 0 | UCollationResult result; |
193 | | |
194 | | // are we comparing different lengths? |
195 | 0 | int32_t minLength = getLength(); |
196 | 0 | int32_t targetLength = target.getLength(); |
197 | 0 | if (minLength < targetLength) { |
198 | 0 | result = UCOL_LESS; |
199 | 0 | } else if (minLength == targetLength) { |
200 | 0 | result = UCOL_EQUAL; |
201 | 0 | } else { |
202 | 0 | minLength = targetLength; |
203 | 0 | result = UCOL_GREATER; |
204 | 0 | } |
205 | |
|
206 | 0 | if (minLength > 0) { |
207 | 0 | int diff = uprv_memcmp(src, tgt, minLength); |
208 | 0 | if (diff > 0) { |
209 | 0 | return UCOL_GREATER; |
210 | 0 | } |
211 | 0 | else |
212 | 0 | if (diff < 0) { |
213 | 0 | return UCOL_LESS; |
214 | 0 | } |
215 | 0 | } |
216 | | |
217 | 0 | return result; |
218 | 0 | } else { |
219 | 0 | return UCOL_EQUAL; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | | #ifdef U_USE_COLLATION_KEY_DEPRECATES |
224 | | // Create a copy of the byte array. |
225 | | uint8_t* |
226 | | CollationKey::toByteArray(int32_t& count) const |
227 | | { |
228 | | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); |
229 | | |
230 | | if (result == NULL) |
231 | | { |
232 | | count = 0; |
233 | | } |
234 | | else |
235 | | { |
236 | | count = fCount; |
237 | | if (count > 0) { |
238 | | uprv_memcpy(result, fBytes, fCount); |
239 | | } |
240 | | } |
241 | | |
242 | | return result; |
243 | | } |
244 | | #endif |
245 | | |
246 | | static int32_t |
247 | 0 | computeHashCode(const uint8_t *key, int32_t length) { |
248 | 0 | const char *s = reinterpret_cast<const char *>(key); |
249 | 0 | int32_t hash; |
250 | 0 | if (s == NULL || length == 0) { |
251 | 0 | hash = kEmptyHashCode; |
252 | 0 | } else { |
253 | 0 | hash = ustr_hashCharsN(s, length); |
254 | 0 | if (hash == kInvalidHashCode || hash == kBogusHashCode) { |
255 | 0 | hash = kEmptyHashCode; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | return hash; |
259 | 0 | } |
260 | | |
261 | | int32_t |
262 | | CollationKey::hashCode() const |
263 | 0 | { |
264 | | // (Cribbed from UnicodeString) |
265 | | // We cache the hashCode; when it becomes invalid, due to any change to the |
266 | | // string, we note this by setting it to kInvalidHashCode. [LIU] |
267 | | |
268 | | // Note: This method is semantically const, but physically non-const. |
269 | |
|
270 | 0 | if (fHashCode == kInvalidHashCode) |
271 | 0 | { |
272 | 0 | fHashCode = computeHashCode(getBytes(), getLength()); |
273 | 0 | } |
274 | |
|
275 | 0 | return fHashCode; |
276 | 0 | } |
277 | | |
278 | | U_NAMESPACE_END |
279 | | |
280 | | U_CAPI int32_t U_EXPORT2 |
281 | | ucol_keyHashCode(const uint8_t *key, |
282 | | int32_t length) |
283 | 0 | { |
284 | 0 | return icu::computeHashCode(key, length); |
285 | 0 | } |
286 | | |
287 | | #endif /* #if !UCONFIG_NO_COLLATION */ |