/src/icu/source/i18n/sortkey.cpp

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1996-2012, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
//===============================================================================
//
// File sortkey.cpp
//
//
//
// Created by: Helena Shih
//
// Modification History:
//
//  Date         Name          Description
//
//  6/20/97      helena        Java class name change.
//  6/23/97      helena        Added comments to make code more readable.
//  6/26/98      erm           Changed to use byte arrays instead of UnicodeString
//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
//                             Cleaned up operator=
// 07/12/99      helena        HPUX 11 CC port.
// 03/06/01      synwee        Modified compareTo, to handle the result of
//                             2 string similar in contents, but one is longer
//                             than the other
//===============================================================================

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/sortkey.h"
#include "cmemory.h"
#include "uelement.h"
#include "ustr_imp.h"

U_NAMESPACE_BEGIN

// A hash code of kInvalidHashCode indicates that the hash code needs
// to be computed. A hash code of kEmptyHashCode is used for empty keys
// and for any key whose computed hash code is kInvalidHashCode.
static const int32_t kInvalidHashCode = 0;
static const int32_t kEmptyHashCode = 1;
// The "bogus hash code" replaces a separate fBogus flag.
static const int32_t kBogusHashCode = 2;

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)

CollationKey::CollationKey()
    : UObject(), fFlagAndLength(0),
      fHashCode(kEmptyHashCode)
{
}

// Create a collation key from a bit array.
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
    : UObject(), fFlagAndLength(count),
      fHashCode(kInvalidHashCode)
{
    if (count < 0 || (newValues == NULL && count != 0) ||
            (count > getCapacity() && reallocate(count, 0) == NULL)) {
        setToBogus();
        return;
    }

    if (count > 0) {
        uprv_memcpy(getBytes(), newValues, count);
    }
}

CollationKey::CollationKey(const CollationKey& other)
    : UObject(other), fFlagAndLength(other.getLength()),
      fHashCode(other.fHashCode)
{
    if (other.isBogus())
    {
        setToBogus();
        return;
    }

    int32_t length = fFlagAndLength;
    if (length > getCapacity() && reallocate(length, 0) == NULL) {
        setToBogus();
        return;
    }

    if (length > 0) {
        uprv_memcpy(getBytes(), other.getBytes(), length);
    }
}

CollationKey::~CollationKey()
{
    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
}

uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
    uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
    if(newBytes == NULL) { return NULL; }
    if(length > 0) {
        uprv_memcpy(newBytes, getBytes(), length);
    }
    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
    fUnion.fFields.fBytes = newBytes;
    fUnion.fFields.fCapacity = newCapacity;
    fFlagAndLength |= 0x80000000;
    return newBytes;
}

void CollationKey::setLength(int32_t newLength) {
    // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
    fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
    fHashCode = kInvalidHashCode;
}

// set the key to an empty state
CollationKey&
CollationKey::reset()
{
    fFlagAndLength &= 0x80000000;
    fHashCode = kEmptyHashCode;

    return *this;
}

// set the key to a "bogus" or invalid state
CollationKey&
CollationKey::setToBogus()
{
    fFlagAndLength &= 0x80000000;
    fHashCode = kBogusHashCode;

    return *this;
}

bool
CollationKey::operator==(const CollationKey& source) const
{
    return getLength() == source.getLength() &&
            (this == &source ||
             uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
}

const CollationKey&
CollationKey::operator=(const CollationKey& other)
{
    if (this != &other)
    {
        if (other.isBogus())
        {
            return setToBogus();
        }

        int32_t length = other.getLength();
        if (length > getCapacity() && reallocate(length, 0) == NULL) {
            return setToBogus();
        }
        if (length > 0) {
            uprv_memcpy(getBytes(), other.getBytes(), length);
        }
        fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
        fHashCode = other.fHashCode;
    }

    return *this;
}

// Bitwise comparison for the collation keys.
Collator::EComparisonResult
CollationKey::compareTo(const CollationKey& target) const
{
    UErrorCode errorCode = U_ZERO_ERROR;
    return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
}

// Bitwise comparison for the collation keys.
UCollationResult
CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
{
  if(U_SUCCESS(status)) {
    const uint8_t *src = getBytes();
    const uint8_t *tgt = target.getBytes();

    // are we comparing the same string
    if (src == tgt)
        return  UCOL_EQUAL;

    UCollationResult result;

    // are we comparing different lengths?
    int32_t minLength = getLength();
    int32_t targetLength = target.getLength();
    if (minLength < targetLength) {
        result = UCOL_LESS;
    } else if (minLength == targetLength) {
        result = UCOL_EQUAL;
    } else {
        minLength = targetLength;
        result = UCOL_GREATER;
    }

    if (minLength > 0) {
        int diff = uprv_memcmp(src, tgt, minLength);
        if (diff > 0) {
            return UCOL_GREATER;
        }
        else
            if (diff < 0) {
                return UCOL_LESS;
            }
    }

    return result;
  } else {
    return UCOL_EQUAL;
  }
}

#ifdef U_USE_COLLATION_KEY_DEPRECATES
// Create a copy of the byte array.
uint8_t*
CollationKey::toByteArray(int32_t& count) const
{
    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );

    if (result == NULL)
    {
        count = 0;
    }
    else
    {
        count = fCount;
        if (count > 0) {
            uprv_memcpy(result, fBytes, fCount);
        }
    }

    return result;
}
#endif

static int32_t
computeHashCode(const uint8_t *key, int32_t  length) {
    const char *s = reinterpret_cast<const char *>(key);
    int32_t hash;
    if (s == NULL || length == 0) {
        hash = kEmptyHashCode;
    } else {
        hash = ustr_hashCharsN(s, length);
        if (hash == kInvalidHashCode || hash == kBogusHashCode) {
            hash = kEmptyHashCode;
        }
    }
    return hash;
}

int32_t
CollationKey::hashCode() const
{
    // (Cribbed from UnicodeString)
    // We cache the hashCode; when it becomes invalid, due to any change to the
    // string, we note this by setting it to kInvalidHashCode. [LIU]

    // Note: This method is semantically const, but physically non-const.

    if (fHashCode == kInvalidHashCode)
    {
        fHashCode = computeHashCode(getBytes(), getLength());
    }

    return fHashCode;
}

U_NAMESPACE_END

U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t *key, 
                       int32_t  length)
{
    return icu::computeHashCode(key, length);
}

#endif /* #if !UCONFIG_NO_COLLATION */

Coverage Report

Created: 2025-06-24 06:43

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		*******************************************************************************
5		* Copyright (C) 1996-2012, International Business Machines Corporation and
6		* others. All Rights Reserved.
7		*******************************************************************************
8		*/
9		//===============================================================================
10		//
11		// File sortkey.cpp
12		//
13		//
14		//
15		// Created by: Helena Shih
16		//
17		// Modification History:
18		//
19		// Date Name Description
20		//
21		// 6/20/97 helena Java class name change.
22		// 6/23/97 helena Added comments to make code more readable.
23		// 6/26/98 erm Changed to use byte arrays instead of UnicodeString
24		// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
25		// Cleaned up operator=
26		// 07/12/99 helena HPUX 11 CC port.
27		// 03/06/01 synwee Modified compareTo, to handle the result of
28		// 2 string similar in contents, but one is longer
29		// than the other
30		//===============================================================================
31
32		#include "unicode/utypes.h"
33
34		#if !UCONFIG_NO_COLLATION
35
36		#include "unicode/sortkey.h"
37		#include "cmemory.h"
38		#include "uelement.h"
39		#include "ustr_imp.h"
40
41		U_NAMESPACE_BEGIN
42
43		// A hash code of kInvalidHashCode indicates that the hash code needs
44		// to be computed. A hash code of kEmptyHashCode is used for empty keys
45		// and for any key whose computed hash code is kInvalidHashCode.
46		static const int32_t kInvalidHashCode = 0;
47		static const int32_t kEmptyHashCode = 1;
48		// The "bogus hash code" replaces a separate fBogus flag.
49		static const int32_t kBogusHashCode = 2;
50
51		UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
52
53		CollationKey::CollationKey()
54	0	: UObject(), fFlagAndLength(0),
55	0	fHashCode(kEmptyHashCode)
56	0	{
57	0	}
58
59		// Create a collation key from a bit array.
60		CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
61	0	: UObject(), fFlagAndLength(count),
62	0	fHashCode(kInvalidHashCode)
63	0	{
64	0	if (count < 0 \|\| (newValues == NULL && count != 0) \|\|
65	0	(count > getCapacity() && reallocate(count, 0) == NULL)) {
66	0	setToBogus();
67	0	return;
68	0	}
69
70	0	if (count > 0) {
71	0	uprv_memcpy(getBytes(), newValues, count);
72	0	}
73	0	}
74
75		CollationKey::CollationKey(const CollationKey& other)
76	0	: UObject(other), fFlagAndLength(other.getLength()),
77	0	fHashCode(other.fHashCode)
78	0	{
79	0	if (other.isBogus())
80	0	{
81	0	setToBogus();
82	0	return;
83	0	}
84
85	0	int32_t length = fFlagAndLength;
86	0	if (length > getCapacity() && reallocate(length, 0) == NULL) {
87	0	setToBogus();
88	0	return;
89	0	}
90
91	0	if (length > 0) {
92	0	uprv_memcpy(getBytes(), other.getBytes(), length);
93	0	}
94	0	}
95
96		CollationKey::~CollationKey()
97	0	{
98	0	if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
99	0	}
100
101	0	uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
102	0	uint8_t newBytes = static_cast<uint8_t >(uprv_malloc(newCapacity));
103	0	if(newBytes == NULL) { return NULL; }
104	0	if(length > 0) {
105	0	uprv_memcpy(newBytes, getBytes(), length);
106	0	}
107	0	if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
108	0	fUnion.fFields.fBytes = newBytes;
109	0	fUnion.fFields.fCapacity = newCapacity;
110	0	fFlagAndLength \|= 0x80000000;
111	0	return newBytes;
112	0	}
113
114	0	void CollationKey::setLength(int32_t newLength) {
115		// U_ASSERT(newLength >= 0 && newLength <= getCapacity());
116	0	fFlagAndLength = (fFlagAndLength & 0x80000000) \| newLength;
117	0	fHashCode = kInvalidHashCode;
118	0	}
119
120		// set the key to an empty state
121		CollationKey&
122		CollationKey::reset()
123	0	{
124	0	fFlagAndLength &= 0x80000000;
125	0	fHashCode = kEmptyHashCode;
126
127	0	return *this;
128	0	}
129
130		// set the key to a "bogus" or invalid state
131		CollationKey&
132		CollationKey::setToBogus()
133	0	{
134	0	fFlagAndLength &= 0x80000000;
135	0	fHashCode = kBogusHashCode;
136
137	0	return *this;
138	0	}
139
140		bool
141		CollationKey::operator==(const CollationKey& source) const
142	0	{
143	0	return getLength() == source.getLength() &&
144	0	(this == &source \|\|
145	0	uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
146	0	}
147
148		const CollationKey&
149		CollationKey::operator=(const CollationKey& other)
150	0	{
151	0	if (this != &other)
152	0	{
153	0	if (other.isBogus())
154	0	{
155	0	return setToBogus();
156	0	}
157
158	0	int32_t length = other.getLength();
159	0	if (length > getCapacity() && reallocate(length, 0) == NULL) {
160	0	return setToBogus();
161	0	}
162	0	if (length > 0) {
163	0	uprv_memcpy(getBytes(), other.getBytes(), length);
164	0	}
165	0	fFlagAndLength = (fFlagAndLength & 0x80000000) \| length;
166	0	fHashCode = other.fHashCode;
167	0	}
168
169	0	return *this;
170	0	}
171
172		// Bitwise comparison for the collation keys.
173		Collator::EComparisonResult
174		CollationKey::compareTo(const CollationKey& target) const
175	0	{
176	0	UErrorCode errorCode = U_ZERO_ERROR;
177	0	return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
178	0	}
179
180		// Bitwise comparison for the collation keys.
181		UCollationResult
182		CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
183	0	{
184	0	if(U_SUCCESS(status)) {
185	0	const uint8_t *src = getBytes();
186	0	const uint8_t *tgt = target.getBytes();
187
188		// are we comparing the same string
189	0	if (src == tgt)
190	0	return UCOL_EQUAL;
191
192	0	UCollationResult result;
193
194		// are we comparing different lengths?
195	0	int32_t minLength = getLength();
196	0	int32_t targetLength = target.getLength();
197	0	if (minLength < targetLength) {
198	0	result = UCOL_LESS;
199	0	} else if (minLength == targetLength) {
200	0	result = UCOL_EQUAL;
201	0	} else {
202	0	minLength = targetLength;
203	0	result = UCOL_GREATER;
204	0	}
205
206	0	if (minLength > 0) {
207	0	int diff = uprv_memcmp(src, tgt, minLength);
208	0	if (diff > 0) {
209	0	return UCOL_GREATER;
210	0	}
211	0	else
212	0	if (diff < 0) {
213	0	return UCOL_LESS;
214	0	}
215	0	}
216
217	0	return result;
218	0	} else {
219	0	return UCOL_EQUAL;
220	0	}
221	0	}
222
223		#ifdef U_USE_COLLATION_KEY_DEPRECATES
224		// Create a copy of the byte array.
225		uint8_t*
226		CollationKey::toByteArray(int32_t& count) const
227		{
228		uint8_t result = (uint8_t) uprv_malloc( sizeof(uint8_t) * fCount );
229
230		if (result == NULL)
231		{
232		count = 0;
233		}
234		else
235		{
236		count = fCount;
237		if (count > 0) {
238		uprv_memcpy(result, fBytes, fCount);
239		}
240		}
241
242		return result;
243		}
244		#endif
245
246		static int32_t
247	0	computeHashCode(const uint8_t *key, int32_t length) {
248	0	const char s = reinterpret_cast<const char >(key);
249	0	int32_t hash;
250	0	if (s == NULL \|\| length == 0) {
251	0	hash = kEmptyHashCode;
252	0	} else {
253	0	hash = ustr_hashCharsN(s, length);
254	0	if (hash == kInvalidHashCode \|\| hash == kBogusHashCode) {
255	0	hash = kEmptyHashCode;
256	0	}
257	0	}
258	0	return hash;
259	0	}
260
261		int32_t
262		CollationKey::hashCode() const
263	0	{
264		// (Cribbed from UnicodeString)
265		// We cache the hashCode; when it becomes invalid, due to any change to the
266		// string, we note this by setting it to kInvalidHashCode. [LIU]
267
268		// Note: This method is semantically const, but physically non-const.
269
270	0	if (fHashCode == kInvalidHashCode)
271	0	{
272	0	fHashCode = computeHashCode(getBytes(), getLength());
273	0	}
274
275	0	return fHashCode;
276	0	}
277
278		U_NAMESPACE_END
279
280		U_CAPI int32_t U_EXPORT2
281		ucol_keyHashCode(const uint8_t *key,
282		int32_t length)
283	0	{
284	0	return icu::computeHashCode(key, length);
285	0	}
286
287		#endif /* #if !UCONFIG_NO_COLLATION */