/src/icu/source/i18n/collationrootelements.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2013-2014, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collationrootelements.h  | 
9  |  | *  | 
10  |  | * created on: 2013mar01  | 
11  |  | * created by: Markus W. Scherer  | 
12  |  | */  | 
13  |  |  | 
14  |  | #ifndef __COLLATIONROOTELEMENTS_H__  | 
15  |  | #define __COLLATIONROOTELEMENTS_H__  | 
16  |  |  | 
17  |  | #include "unicode/utypes.h"  | 
18  |  |  | 
19  |  | #if !UCONFIG_NO_COLLATION  | 
20  |  |  | 
21  |  | #include "unicode/uobject.h"  | 
22  |  | #include "collation.h"  | 
23  |  |  | 
24  |  | U_NAMESPACE_BEGIN  | 
25  |  |  | 
26  |  | /**  | 
27  |  |  * Container and access methods for collation elements and weights  | 
28  |  |  * that occur in the root collator.  | 
29  |  |  * Needed for finding boundaries for building a tailoring.  | 
30  |  |  *  | 
31  |  |  * This class takes and returns 16-bit secondary and tertiary weights.  | 
32  |  |  */  | 
33  |  | class U_I18N_API CollationRootElements : public UMemory { | 
34  |  | public:  | 
35  |  |     CollationRootElements(const uint32_t *rootElements, int32_t rootElementsLength)  | 
36  | 0  |             : elements(rootElements), length(rootElementsLength) {} | 
37  |  |  | 
38  |  |     /**  | 
39  |  |      * Higher than any root primary.  | 
40  |  |      */  | 
41  |  |     static const uint32_t PRIMARY_SENTINEL = 0xffffff00;  | 
42  |  |  | 
43  |  |     /**  | 
44  |  |      * Flag in a root element, set if the element contains secondary & tertiary weights,  | 
45  |  |      * rather than a primary.  | 
46  |  |      */  | 
47  |  |     static const uint32_t SEC_TER_DELTA_FLAG = 0x80;  | 
48  |  |     /**  | 
49  |  |      * Mask for getting the primary range step value from a primary-range-end element.  | 
50  |  |      */  | 
51  |  |     static const uint8_t PRIMARY_STEP_MASK = 0x7f;  | 
52  |  |  | 
53  |  |     enum { | 
54  |  |         /**  | 
55  |  |          * Index of the first CE with a non-zero tertiary weight.  | 
56  |  |          * Same as the start of the compact root elements table.  | 
57  |  |          */  | 
58  |  |         IX_FIRST_TERTIARY_INDEX,  | 
59  |  |         /**  | 
60  |  |          * Index of the first CE with a non-zero secondary weight.  | 
61  |  |          */  | 
62  |  |         IX_FIRST_SECONDARY_INDEX,  | 
63  |  |         /**  | 
64  |  |          * Index of the first CE with a non-zero primary weight.  | 
65  |  |          */  | 
66  |  |         IX_FIRST_PRIMARY_INDEX,  | 
67  |  |         /**  | 
68  |  |          * Must match Collation::COMMON_SEC_AND_TER_CE.  | 
69  |  |          */  | 
70  |  |         IX_COMMON_SEC_AND_TER_CE,  | 
71  |  |         /**  | 
72  |  |          * Secondary & tertiary boundaries.  | 
73  |  |          * Bits 31..24: [fixed last secondary common byte 45]  | 
74  |  |          * Bits 23..16: [fixed first ignorable secondary byte 80]  | 
75  |  |          * Bits 15.. 8: reserved, 0  | 
76  |  |          * Bits  7.. 0: [fixed first ignorable tertiary byte 3C]  | 
77  |  |          */  | 
78  |  |         IX_SEC_TER_BOUNDARIES,  | 
79  |  |         /**  | 
80  |  |          * The current number of indexes.  | 
81  |  |          * Currently the same as elements[IX_FIRST_TERTIARY_INDEX].  | 
82  |  |          */  | 
83  |  |         IX_COUNT  | 
84  |  |     };  | 
85  |  |  | 
86  |  |     /**  | 
87  |  |      * Returns the boundary between tertiary weights of primary/secondary CEs  | 
88  |  |      * and those of tertiary CEs.  | 
89  |  |      * This is the upper limit for tertiaries of primary/secondary CEs.  | 
90  |  |      * This minus one is the lower limit for tertiaries of tertiary CEs.  | 
91  |  |      */  | 
92  | 0  |     uint32_t getTertiaryBoundary() const { | 
93  | 0  |         return (elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;  | 
94  | 0  |     }  | 
95  |  |  | 
96  |  |     /**  | 
97  |  |      * Returns the first assigned tertiary CE.  | 
98  |  |      */  | 
99  | 0  |     uint32_t getFirstTertiaryCE() const { | 
100  | 0  |         return elements[elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;  | 
101  | 0  |     }  | 
102  |  |  | 
103  |  |     /**  | 
104  |  |      * Returns the last assigned tertiary CE.  | 
105  |  |      */  | 
106  | 0  |     uint32_t getLastTertiaryCE() const { | 
107  | 0  |         return elements[elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;  | 
108  | 0  |     }  | 
109  |  |  | 
110  |  |     /**  | 
111  |  |      * Returns the last common secondary weight.  | 
112  |  |      * This is the lower limit for secondaries of primary CEs.  | 
113  |  |      */  | 
114  | 0  |     uint32_t getLastCommonSecondary() const { | 
115  | 0  |         return (elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;  | 
116  | 0  |     }  | 
117  |  |  | 
118  |  |     /**  | 
119  |  |      * Returns the boundary between secondary weights of primary CEs  | 
120  |  |      * and those of secondary CEs.  | 
121  |  |      * This is the upper limit for secondaries of primary CEs.  | 
122  |  |      * This minus one is the lower limit for secondaries of secondary CEs.  | 
123  |  |      */  | 
124  | 0  |     uint32_t getSecondaryBoundary() const { | 
125  | 0  |         return (elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;  | 
126  | 0  |     }  | 
127  |  |  | 
128  |  |     /**  | 
129  |  |      * Returns the first assigned secondary CE.  | 
130  |  |      */  | 
131  | 0  |     uint32_t getFirstSecondaryCE() const { | 
132  | 0  |         return elements[elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;  | 
133  | 0  |     }  | 
134  |  |  | 
135  |  |     /**  | 
136  |  |      * Returns the last assigned secondary CE.  | 
137  |  |      */  | 
138  | 0  |     uint32_t getLastSecondaryCE() const { | 
139  | 0  |         return elements[elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;  | 
140  | 0  |     }  | 
141  |  |  | 
142  |  |     /**  | 
143  |  |      * Returns the first assigned primary weight.  | 
144  |  |      */  | 
145  | 0  |     uint32_t getFirstPrimary() const { | 
146  | 0  |         return elements[elements[IX_FIRST_PRIMARY_INDEX]];  // step=0: cannot be a range end  | 
147  | 0  |     }  | 
148  |  |  | 
149  |  |     /**  | 
150  |  |      * Returns the first assigned primary CE.  | 
151  |  |      */  | 
152  | 0  |     int64_t getFirstPrimaryCE() const { | 
153  | 0  |         return Collation::makeCE(getFirstPrimary());  | 
154  | 0  |     }  | 
155  |  |  | 
156  |  |     /**  | 
157  |  |      * Returns the last root CE with a primary weight before p.  | 
158  |  |      * Intended only for reordering group boundaries.  | 
159  |  |      */  | 
160  |  |     int64_t lastCEWithPrimaryBefore(uint32_t p) const;  | 
161  |  |  | 
162  |  |     /**  | 
163  |  |      * Returns the first root CE with a primary weight of at least p.  | 
164  |  |      * Intended only for reordering group boundaries.  | 
165  |  |      */  | 
166  |  |     int64_t firstCEWithPrimaryAtLeast(uint32_t p) const;  | 
167  |  |  | 
168  |  |     /**  | 
169  |  |      * Returns the primary weight before p.  | 
170  |  |      * p must be greater than the first root primary.  | 
171  |  |      */  | 
172  |  |     uint32_t getPrimaryBefore(uint32_t p, UBool isCompressible) const;  | 
173  |  |  | 
174  |  |     /** Returns the secondary weight before [p, s]. */  | 
175  |  |     uint32_t getSecondaryBefore(uint32_t p, uint32_t s) const;  | 
176  |  |  | 
177  |  |     /** Returns the tertiary weight before [p, s, t]. */  | 
178  |  |     uint32_t getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const;  | 
179  |  |  | 
180  |  |     /**  | 
181  |  |      * Finds the index of the input primary.  | 
182  |  |      * p must occur as a root primary, and must not be 0.  | 
183  |  |      */  | 
184  |  |     int32_t findPrimary(uint32_t p) const;  | 
185  |  |  | 
186  |  |     /**  | 
187  |  |      * Returns the primary weight after p where index=findPrimary(p).  | 
188  |  |      * p must be at least the first root primary.  | 
189  |  |      */  | 
190  |  |     uint32_t getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const;  | 
191  |  |     /**  | 
192  |  |      * Returns the secondary weight after [p, s] where index=findPrimary(p)  | 
193  |  |      * except use index=0 for p=0.  | 
194  |  |      *  | 
195  |  |      * Must return a weight for every root [p, s] as well as for every weight  | 
196  |  |      * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16.  | 
197  |  |      *  | 
198  |  |      * Exception: [0, 0] is handled by the CollationBuilder:  | 
199  |  |      * Both its lower and upper boundaries are special.  | 
200  |  |      */  | 
201  |  |     uint32_t getSecondaryAfter(int32_t index, uint32_t s) const;  | 
202  |  |     /**  | 
203  |  |      * Returns the tertiary weight after [p, s, t] where index=findPrimary(p)  | 
204  |  |      * except use index=0 for p=0.  | 
205  |  |      *  | 
206  |  |      * Must return a weight for every root [p, s, t] as well as for every weight  | 
207  |  |      * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16.  | 
208  |  |      *  | 
209  |  |      * Exception: [0, 0, 0] is handled by the CollationBuilder:  | 
210  |  |      * Both its lower and upper boundaries are special.  | 
211  |  |      */  | 
212  |  |     uint32_t getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const;  | 
213  |  |  | 
214  |  | private:  | 
215  |  |     /**  | 
216  |  |      * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.  | 
217  |  |      */  | 
218  |  |     uint32_t getFirstSecTerForPrimary(int32_t index) const;  | 
219  |  |  | 
220  |  |     /**  | 
221  |  |      * Finds the largest index i where elements[i]<=p.  | 
222  |  |      * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).  | 
223  |  |      * Does not require that p is a root collator primary.  | 
224  |  |      */  | 
225  |  |     int32_t findP(uint32_t p) const;  | 
226  |  |  | 
227  | 0  |     static inline UBool isEndOfPrimaryRange(uint32_t q) { | 
228  | 0  |         return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;  | 
229  | 0  |     }  | 
230  |  |  | 
231  |  |     /**  | 
232  |  |      * Data structure:  | 
233  |  |      *  | 
234  |  |      * The first few entries are indexes, up to elements[IX_FIRST_TERTIARY_INDEX].  | 
235  |  |      * See the comments on the IX_ constants.  | 
236  |  |      *  | 
237  |  |      * All other elements are a compact form of the root collator CEs  | 
238  |  |      * in mostly collation order.  | 
239  |  |      *  | 
240  |  |      * A sequence of one or more root CEs with the same primary weight is stored as  | 
241  |  |      * one element with the primary weight, with the SEC_TER_DELTA_FLAG flag not set,  | 
242  |  |      * followed by elements with only the secondary/tertiary weights,  | 
243  |  |      * each with that flag set.  | 
244  |  |      * If the lowest secondary/tertiary combination is Collation::COMMON_SEC_AND_TER_CE,  | 
245  |  |      * then the element for that combination is omitted.  | 
246  |  |      *  | 
247  |  |      * Note: If the first actual secondary/tertiary combination is higher than  | 
248  |  |      * Collation::COMMON_SEC_AND_TER_CE (which is unusual),  | 
249  |  |      * the runtime code will assume anyway that Collation::COMMON_SEC_AND_TER_CE is present.  | 
250  |  |      *  | 
251  |  |      * A range of only-primary CEs with a consistent "step" increment  | 
252  |  |      * from each primary to the next may be stored as a range.  | 
253  |  |      * Only the first and last primary are stored, and the last has the step  | 
254  |  |      * value in the low bits (PRIMARY_STEP_MASK).  | 
255  |  |      *  | 
256  |  |      * An range-end element may also either start a new range or be followed by  | 
257  |  |      * elements with secondary/tertiary deltas.  | 
258  |  |      *  | 
259  |  |      * A primary element that is not a range end has zero step bits.  | 
260  |  |      *  | 
261  |  |      * There is no element for the completely ignorable CE (all weights 0).  | 
262  |  |      *  | 
263  |  |      * Before elements[IX_FIRST_PRIMARY_INDEX], all elements are secondary/tertiary deltas,  | 
264  |  |      * for all of the ignorable root CEs.  | 
265  |  |      *  | 
266  |  |      * There are no elements for unassigned-implicit primary CEs.  | 
267  |  |      * All primaries stored here are at most 3 bytes long.  | 
268  |  |      */  | 
269  |  |     const uint32_t *elements;  | 
270  |  |     int32_t length;  | 
271  |  | };  | 
272  |  |  | 
273  |  | U_NAMESPACE_END  | 
274  |  |  | 
275  |  | #endif  // !UCONFIG_NO_COLLATION  | 
276  |  | #endif  // __COLLATIONROOTELEMENTS_H__  |