/src/icu/source/i18n/collationkeys.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2012-2014, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collationkeys.h  | 
9  |  | *  | 
10  |  | * created on: 2012sep02  | 
11  |  | * created by: Markus W. Scherer  | 
12  |  | */  | 
13  |  |  | 
14  |  | #ifndef __COLLATIONKEYS_H__  | 
15  |  | #define __COLLATIONKEYS_H__  | 
16  |  |  | 
17  |  | #include "unicode/utypes.h"  | 
18  |  |  | 
19  |  | #if !UCONFIG_NO_COLLATION  | 
20  |  |  | 
21  |  | #include "unicode/bytestream.h"  | 
22  |  | #include "unicode/ucol.h"  | 
23  |  | #include "charstr.h"  | 
24  |  | #include "collation.h"  | 
25  |  |  | 
26  |  | U_NAMESPACE_BEGIN  | 
27  |  |  | 
28  |  | class CollationIterator;  | 
29  |  | struct CollationDataReader;  | 
30  |  | struct CollationSettings;  | 
31  |  |  | 
32  |  | class SortKeyByteSink : public ByteSink { | 
33  |  | public:  | 
34  |  |     SortKeyByteSink(char *dest, int32_t destCapacity)  | 
35  | 0  |             : buffer_(dest), capacity_(destCapacity),  | 
36  | 0  |               appended_(0), ignore_(0) {} | 
37  |  |     virtual ~SortKeyByteSink();  | 
38  |  |  | 
39  | 0  |     void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } | 
40  |  |  | 
41  |  |     virtual void Append(const char *bytes, int32_t n);  | 
42  | 0  |     void Append(uint32_t b) { | 
43  | 0  |         if (ignore_ > 0) { | 
44  | 0  |             --ignore_;  | 
45  | 0  |         } else { | 
46  | 0  |             if (appended_ < capacity_ || Resize(1, appended_)) { | 
47  | 0  |                 buffer_[appended_] = (char)b;  | 
48  | 0  |             }  | 
49  | 0  |             ++appended_;  | 
50  | 0  |         }  | 
51  | 0  |     }  | 
52  |  |     virtual char *GetAppendBuffer(int32_t min_capacity,  | 
53  |  |                                   int32_t desired_capacity_hint,  | 
54  |  |                                   char *scratch, int32_t scratch_capacity,  | 
55  |  |                                   int32_t *result_capacity);  | 
56  | 0  |     int32_t NumberOfBytesAppended() const { return appended_; } | 
57  |  |  | 
58  |  |     /**  | 
59  |  |      * @return how many bytes can be appended (including ignored ones)  | 
60  |  |      *         without reallocation  | 
61  |  |      */  | 
62  | 0  |     int32_t GetRemainingCapacity() const { | 
63  |  |         // Either ignore_ or appended_ should be 0.  | 
64  | 0  |         return ignore_ + capacity_ - appended_;  | 
65  | 0  |     }  | 
66  |  |  | 
67  | 0  |     UBool Overflowed() const { return appended_ > capacity_; } | 
68  |  |     /** @return false if memory allocation failed */  | 
69  | 0  |     UBool IsOk() const { return buffer_ != NULL; } | 
70  |  |  | 
71  |  | protected:  | 
72  |  |     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;  | 
73  |  |     virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;  | 
74  |  |  | 
75  | 0  |     void SetNotOk() { | 
76  | 0  |         buffer_ = NULL;  | 
77  | 0  |         capacity_ = 0;  | 
78  | 0  |     }  | 
79  |  |  | 
80  |  |     char *buffer_;  | 
81  |  |     int32_t capacity_;  | 
82  |  |     int32_t appended_;  | 
83  |  |     int32_t ignore_;  | 
84  |  |  | 
85  |  | private:  | 
86  |  |     SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented  | 
87  |  |     SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented  | 
88  |  | };  | 
89  |  |  | 
90  |  | class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { | 
91  |  | public:  | 
92  |  |     class LevelCallback : public UMemory { | 
93  |  |     public:  | 
94  |  |         virtual ~LevelCallback();  | 
95  |  |         /**  | 
96  |  |          * @param level The next level about to be written to the ByteSink.  | 
97  |  |          * @return true if the level is to be written  | 
98  |  |          *         (the base class implementation always returns true)  | 
99  |  |          */  | 
100  |  |         virtual UBool needToWrite(Collation::Level level);  | 
101  |  |     };  | 
102  |  |  | 
103  |  |     /**  | 
104  |  |      * Writes the sort key bytes for minLevel up to the iterator data's strength.  | 
105  |  |      * Optionally writes the case level.  | 
106  |  |      * Stops writing levels when callback.needToWrite(level) returns false.  | 
107  |  |      * Separates levels with the LEVEL_SEPARATOR_BYTE  | 
108  |  |      * but does not write a TERMINATOR_BYTE.  | 
109  |  |      */  | 
110  |  |     static void writeSortKeyUpToQuaternary(CollationIterator &iter,  | 
111  |  |                                            const UBool *compressibleBytes,  | 
112  |  |                                            const CollationSettings &settings,  | 
113  |  |                                            SortKeyByteSink &sink,  | 
114  |  |                                            Collation::Level minLevel, LevelCallback &callback,  | 
115  |  |                                            UBool preflight, UErrorCode &errorCode);  | 
116  |  | private:  | 
117  |  |     friend struct CollationDataReader;  | 
118  |  |  | 
119  |  |     CollationKeys();  // no instantiation  | 
120  |  |  | 
121  |  |     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.  | 
122  |  |     static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;  | 
123  |  |     static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;  | 
124  |  |     static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;  | 
125  |  |     static const int32_t SEC_COMMON_MAX_COUNT = 0x21;  | 
126  |  |  | 
127  |  |     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.  | 
128  |  |     static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;  | 
129  |  |     static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;  | 
130  |  |     static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;  | 
131  |  |     static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;  | 
132  |  |  | 
133  |  |     // Case level, upperFirst: Compress up to 13 common weights as 3..15.  | 
134  |  |     static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;  | 
135  |  |     static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;  | 
136  |  |     static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;  | 
137  |  |  | 
138  |  |     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.  | 
139  |  |     static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;  | 
140  |  |     static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;  | 
141  |  |     static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;  | 
142  |  |     static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;  | 
143  |  |  | 
144  |  |     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.  | 
145  |  |     static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;  | 
146  |  |     static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;  | 
147  |  |     static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;  | 
148  |  |     static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;  | 
149  |  |  | 
150  |  |     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.  | 
151  |  |     static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;  | 
152  |  |     static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;  | 
153  |  |     static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;  | 
154  |  |     static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;  | 
155  |  |  | 
156  |  |     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.  | 
157  |  |     static const uint32_t QUAT_COMMON_LOW = 0x1c;  | 
158  |  |     static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;  | 
159  |  |     static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;  | 
160  |  |     static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;  | 
161  |  |     // Primary weights shifted to quaternary level must be encoded with  | 
162  |  |     // a lead byte below the common-weight compression range.  | 
163  |  |     static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b  | 
164  |  | };  | 
165  |  |  | 
166  |  | U_NAMESPACE_END  | 
167  |  |  | 
168  |  | #endif  // !UCONFIG_NO_COLLATION  | 
169  |  | #endif  // __COLLATIONKEYS_H__  |