Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/sortkey.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2012, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*/
9
//===============================================================================
10
//
11
// File sortkey.cpp
12
//
13
//
14
//
15
// Created by: Helena Shih
16
//
17
// Modification History:
18
//
19
//  Date         Name          Description
20
//
21
//  6/20/97      helena        Java class name change.
22
//  6/23/97      helena        Added comments to make code more readable.
23
//  6/26/98      erm           Changed to use byte arrays instead of UnicodeString
24
//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
25
//                             Cleaned up operator=
26
// 07/12/99      helena        HPUX 11 CC port.
27
// 03/06/01      synwee        Modified compareTo, to handle the result of
28
//                             2 string similar in contents, but one is longer
29
//                             than the other
30
//===============================================================================
31
32
#include "unicode/utypes.h"
33
34
#if !UCONFIG_NO_COLLATION
35
36
#include "unicode/sortkey.h"
37
#include "cmemory.h"
38
#include "uelement.h"
39
#include "ustr_imp.h"
40
41
U_NAMESPACE_BEGIN
42
43
// A hash code of kInvalidHashCode indicates that the hash code needs
44
// to be computed. A hash code of kEmptyHashCode is used for empty keys
45
// and for any key whose computed hash code is kInvalidHashCode.
46
static const int32_t kInvalidHashCode = 0;
47
static const int32_t kEmptyHashCode = 1;
48
// The "bogus hash code" replaces a separate fBogus flag.
49
static const int32_t kBogusHashCode = 2;
50
51
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
52
53
CollationKey::CollationKey()
54
0
    : UObject(), fFlagAndLength(0),
55
0
      fHashCode(kEmptyHashCode)
56
0
{
57
0
}
58
59
// Create a collation key from a bit array.
60
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
61
0
    : UObject(), fFlagAndLength(count),
62
0
      fHashCode(kInvalidHashCode)
63
0
{
64
0
    if (count < 0 || (newValues == NULL && count != 0) ||
65
0
            (count > getCapacity() && reallocate(count, 0) == NULL)) {
66
0
        setToBogus();
67
0
        return;
68
0
    }
69
70
0
    if (count > 0) {
71
0
        uprv_memcpy(getBytes(), newValues, count);
72
0
    }
73
0
}
74
75
CollationKey::CollationKey(const CollationKey& other)
76
0
    : UObject(other), fFlagAndLength(other.getLength()),
77
0
      fHashCode(other.fHashCode)
78
0
{
79
0
    if (other.isBogus())
80
0
    {
81
0
        setToBogus();
82
0
        return;
83
0
    }
84
85
0
    int32_t length = fFlagAndLength;
86
0
    if (length > getCapacity() && reallocate(length, 0) == NULL) {
87
0
        setToBogus();
88
0
        return;
89
0
    }
90
91
0
    if (length > 0) {
92
0
        uprv_memcpy(getBytes(), other.getBytes(), length);
93
0
    }
94
0
}
95
96
CollationKey::~CollationKey()
97
0
{
98
0
    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
99
0
}
100
101
0
uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
102
0
    uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
103
0
    if(newBytes == NULL) { return NULL; }
104
0
    if(length > 0) {
105
0
        uprv_memcpy(newBytes, getBytes(), length);
106
0
    }
107
0
    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
108
0
    fUnion.fFields.fBytes = newBytes;
109
0
    fUnion.fFields.fCapacity = newCapacity;
110
0
    fFlagAndLength |= 0x80000000;
111
0
    return newBytes;
112
0
}
113
114
0
void CollationKey::setLength(int32_t newLength) {
115
    // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
116
0
    fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
117
0
    fHashCode = kInvalidHashCode;
118
0
}
119
120
// set the key to an empty state
121
CollationKey&
122
CollationKey::reset()
123
0
{
124
0
    fFlagAndLength &= 0x80000000;
125
0
    fHashCode = kEmptyHashCode;
126
127
0
    return *this;
128
0
}
129
130
// set the key to a "bogus" or invalid state
131
CollationKey&
132
CollationKey::setToBogus()
133
0
{
134
0
    fFlagAndLength &= 0x80000000;
135
0
    fHashCode = kBogusHashCode;
136
137
0
    return *this;
138
0
}
139
140
bool
141
CollationKey::operator==(const CollationKey& source) const
142
0
{
143
0
    return getLength() == source.getLength() &&
144
0
            (this == &source ||
145
0
             uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
146
0
}
147
148
const CollationKey&
149
CollationKey::operator=(const CollationKey& other)
150
0
{
151
0
    if (this != &other)
152
0
    {
153
0
        if (other.isBogus())
154
0
        {
155
0
            return setToBogus();
156
0
        }
157
158
0
        int32_t length = other.getLength();
159
0
        if (length > getCapacity() && reallocate(length, 0) == NULL) {
160
0
            return setToBogus();
161
0
        }
162
0
        if (length > 0) {
163
0
            uprv_memcpy(getBytes(), other.getBytes(), length);
164
0
        }
165
0
        fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
166
0
        fHashCode = other.fHashCode;
167
0
    }
168
169
0
    return *this;
170
0
}
171
172
// Bitwise comparison for the collation keys.
173
Collator::EComparisonResult
174
CollationKey::compareTo(const CollationKey& target) const
175
0
{
176
0
    UErrorCode errorCode = U_ZERO_ERROR;
177
0
    return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
178
0
}
179
180
// Bitwise comparison for the collation keys.
181
UCollationResult
182
CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
183
0
{
184
0
  if(U_SUCCESS(status)) {
185
0
    const uint8_t *src = getBytes();
186
0
    const uint8_t *tgt = target.getBytes();
187
188
    // are we comparing the same string
189
0
    if (src == tgt)
190
0
        return  UCOL_EQUAL;
191
192
0
    UCollationResult result;
193
194
    // are we comparing different lengths?
195
0
    int32_t minLength = getLength();
196
0
    int32_t targetLength = target.getLength();
197
0
    if (minLength < targetLength) {
198
0
        result = UCOL_LESS;
199
0
    } else if (minLength == targetLength) {
200
0
        result = UCOL_EQUAL;
201
0
    } else {
202
0
        minLength = targetLength;
203
0
        result = UCOL_GREATER;
204
0
    }
205
206
0
    if (minLength > 0) {
207
0
        int diff = uprv_memcmp(src, tgt, minLength);
208
0
        if (diff > 0) {
209
0
            return UCOL_GREATER;
210
0
        }
211
0
        else
212
0
            if (diff < 0) {
213
0
                return UCOL_LESS;
214
0
            }
215
0
    }
216
217
0
    return result;
218
0
  } else {
219
0
    return UCOL_EQUAL;
220
0
  }
221
0
}
222
223
#ifdef U_USE_COLLATION_KEY_DEPRECATES
224
// Create a copy of the byte array.
225
uint8_t*
226
CollationKey::toByteArray(int32_t& count) const
227
{
228
    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
229
230
    if (result == NULL)
231
    {
232
        count = 0;
233
    }
234
    else
235
    {
236
        count = fCount;
237
        if (count > 0) {
238
            uprv_memcpy(result, fBytes, fCount);
239
        }
240
    }
241
242
    return result;
243
}
244
#endif
245
246
static int32_t
247
0
computeHashCode(const uint8_t *key, int32_t  length) {
248
0
    const char *s = reinterpret_cast<const char *>(key);
249
0
    int32_t hash;
250
0
    if (s == NULL || length == 0) {
251
0
        hash = kEmptyHashCode;
252
0
    } else {
253
0
        hash = ustr_hashCharsN(s, length);
254
0
        if (hash == kInvalidHashCode || hash == kBogusHashCode) {
255
0
            hash = kEmptyHashCode;
256
0
        }
257
0
    }
258
0
    return hash;
259
0
}
260
261
int32_t
262
CollationKey::hashCode() const
263
0
{
264
    // (Cribbed from UnicodeString)
265
    // We cache the hashCode; when it becomes invalid, due to any change to the
266
    // string, we note this by setting it to kInvalidHashCode. [LIU]
267
268
    // Note: This method is semantically const, but physically non-const.
269
270
0
    if (fHashCode == kInvalidHashCode)
271
0
    {
272
0
        fHashCode = computeHashCode(getBytes(), getLength());
273
0
    }
274
275
0
    return fHashCode;
276
0
}
277
278
U_NAMESPACE_END
279
280
U_CAPI int32_t U_EXPORT2
281
ucol_keyHashCode(const uint8_t *key, 
282
                       int32_t  length)
283
0
{
284
0
    return icu::computeHashCode(key, length);
285
0
}
286
287
#endif /* #if !UCONFIG_NO_COLLATION */