/src/icu/source/i18n/collationkeys.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2012-2015, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collationkeys.cpp  | 
9  |  | *  | 
10  |  | * created on: 2012sep02  | 
11  |  | * created by: Markus W. Scherer  | 
12  |  | */  | 
13  |  |  | 
14  |  | #include "unicode/utypes.h"  | 
15  |  |  | 
16  |  | #if !UCONFIG_NO_COLLATION  | 
17  |  |  | 
18  |  | #include "unicode/bytestream.h"  | 
19  |  | #include "collation.h"  | 
20  |  | #include "collationiterator.h"  | 
21  |  | #include "collationkeys.h"  | 
22  |  | #include "collationsettings.h"  | 
23  |  | #include "uassert.h"  | 
24  |  |  | 
25  |  | U_NAMESPACE_BEGIN  | 
26  |  |  | 
27  | 0  | SortKeyByteSink::~SortKeyByteSink() {} | 
28  |  |  | 
29  |  | void  | 
30  | 0  | SortKeyByteSink::Append(const char *bytes, int32_t n) { | 
31  | 0  |     if (n <= 0 || bytes == NULL) { | 
32  | 0  |         return;  | 
33  | 0  |     }  | 
34  | 0  |     if (ignore_ > 0) { | 
35  | 0  |         int32_t ignoreRest = ignore_ - n;  | 
36  | 0  |         if (ignoreRest >= 0) { | 
37  | 0  |             ignore_ = ignoreRest;  | 
38  | 0  |             return;  | 
39  | 0  |         } else { | 
40  | 0  |             bytes += ignore_;  | 
41  | 0  |             n = -ignoreRest;  | 
42  | 0  |             ignore_ = 0;  | 
43  | 0  |         }  | 
44  | 0  |     }  | 
45  | 0  |     int32_t length = appended_;  | 
46  | 0  |     appended_ += n;  | 
47  | 0  |     if ((buffer_ + length) == bytes) { | 
48  | 0  |         return;  // the caller used GetAppendBuffer() and wrote the bytes already  | 
49  | 0  |     }  | 
50  | 0  |     int32_t available = capacity_ - length;  | 
51  | 0  |     if (n <= available) { | 
52  | 0  |         uprv_memcpy(buffer_ + length, bytes, n);  | 
53  | 0  |     } else { | 
54  | 0  |         AppendBeyondCapacity(bytes, n, length);  | 
55  | 0  |     }  | 
56  | 0  | }  | 
57  |  |  | 
58  |  | char *  | 
59  |  | SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,  | 
60  |  |                                  int32_t desired_capacity_hint,  | 
61  |  |                                  char *scratch,  | 
62  |  |                                  int32_t scratch_capacity,  | 
63  | 0  |                                  int32_t *result_capacity) { | 
64  | 0  |     if (min_capacity < 1 || scratch_capacity < min_capacity) { | 
65  | 0  |         *result_capacity = 0;  | 
66  | 0  |         return NULL;  | 
67  | 0  |     }  | 
68  | 0  |     if (ignore_ > 0) { | 
69  |  |         // Do not write ignored bytes right at the end of the buffer.  | 
70  | 0  |         *result_capacity = scratch_capacity;  | 
71  | 0  |         return scratch;  | 
72  | 0  |     }  | 
73  | 0  |     int32_t available = capacity_ - appended_;  | 
74  | 0  |     if (available >= min_capacity) { | 
75  | 0  |         *result_capacity = available;  | 
76  | 0  |         return buffer_ + appended_;  | 
77  | 0  |     } else if (Resize(desired_capacity_hint, appended_)) { | 
78  | 0  |         *result_capacity = capacity_ - appended_;  | 
79  | 0  |         return buffer_ + appended_;  | 
80  | 0  |     } else { | 
81  | 0  |         *result_capacity = scratch_capacity;  | 
82  | 0  |         return scratch;  | 
83  | 0  |     }  | 
84  | 0  | }  | 
85  |  |  | 
86  |  | namespace { | 
87  |  |  | 
88  |  | /**  | 
89  |  |  * uint8_t byte buffer, similar to CharString but simpler.  | 
90  |  |  */  | 
91  |  | class SortKeyLevel : public UMemory { | 
92  |  | public:  | 
93  | 0  |     SortKeyLevel() : len(0), ok(TRUE) {} | 
94  | 0  |     ~SortKeyLevel() {} | 
95  |  |  | 
96  |  |     /** @return FALSE if memory allocation failed */  | 
97  | 0  |     UBool isOk() const { return ok; } | 
98  | 0  |     UBool isEmpty() const { return len == 0; } | 
99  | 0  |     int32_t length() const { return len; } | 
100  | 0  |     const uint8_t *data() const { return buffer.getAlias(); } | 
101  | 0  |     uint8_t operator[](int32_t index) const { return buffer[index]; } | 
102  |  |  | 
103  | 0  |     uint8_t *data() { return buffer.getAlias(); } | 
104  |  |  | 
105  |  |     void appendByte(uint32_t b);  | 
106  |  |     void appendWeight16(uint32_t w);  | 
107  |  |     void appendWeight32(uint32_t w);  | 
108  |  |     void appendReverseWeight16(uint32_t w);  | 
109  |  |  | 
110  |  |     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */  | 
111  | 0  |     void appendTo(ByteSink &sink) const { | 
112  | 0  |         U_ASSERT(len > 0 && buffer[len - 1] == 1);  | 
113  | 0  |         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);  | 
114  | 0  |     }  | 
115  |  |  | 
116  |  | private:  | 
117  |  |     MaybeStackArray<uint8_t, 40> buffer;  | 
118  |  |     int32_t len;  | 
119  |  |     UBool ok;  | 
120  |  |  | 
121  |  |     UBool ensureCapacity(int32_t appendCapacity);  | 
122  |  |  | 
123  |  |     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class  | 
124  |  |     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class  | 
125  |  | };  | 
126  |  |  | 
127  | 0  | void SortKeyLevel::appendByte(uint32_t b) { | 
128  | 0  |     if(len < buffer.getCapacity() || ensureCapacity(1)) { | 
129  | 0  |         buffer[len++] = (uint8_t)b;  | 
130  | 0  |     }  | 
131  | 0  | }  | 
132  |  |  | 
133  |  | void  | 
134  | 0  | SortKeyLevel::appendWeight16(uint32_t w) { | 
135  | 0  |     U_ASSERT((w & 0xffff) != 0);  | 
136  | 0  |     uint8_t b0 = (uint8_t)(w >> 8);  | 
137  | 0  |     uint8_t b1 = (uint8_t)w;  | 
138  | 0  |     int32_t appendLength = (b1 == 0) ? 1 : 2;  | 
139  | 0  |     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) { | 
140  | 0  |         buffer[len++] = b0;  | 
141  | 0  |         if(b1 != 0) { | 
142  | 0  |             buffer[len++] = b1;  | 
143  | 0  |         }  | 
144  | 0  |     }  | 
145  | 0  | }  | 
146  |  |  | 
147  |  | void  | 
148  | 0  | SortKeyLevel::appendWeight32(uint32_t w) { | 
149  | 0  |     U_ASSERT(w != 0);  | 
150  | 0  |     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w }; | 
151  | 0  |     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;  | 
152  | 0  |     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) { | 
153  | 0  |         buffer[len++] = bytes[0];  | 
154  | 0  |         if(bytes[1] != 0) { | 
155  | 0  |             buffer[len++] = bytes[1];  | 
156  | 0  |             if(bytes[2] != 0) { | 
157  | 0  |                 buffer[len++] = bytes[2];  | 
158  | 0  |                 if(bytes[3] != 0) { | 
159  | 0  |                     buffer[len++] = bytes[3];  | 
160  | 0  |                 }  | 
161  | 0  |             }  | 
162  | 0  |         }  | 
163  | 0  |     }  | 
164  | 0  | }  | 
165  |  |  | 
166  |  | void  | 
167  | 0  | SortKeyLevel::appendReverseWeight16(uint32_t w) { | 
168  | 0  |     U_ASSERT((w & 0xffff) != 0);  | 
169  | 0  |     uint8_t b0 = (uint8_t)(w >> 8);  | 
170  | 0  |     uint8_t b1 = (uint8_t)w;  | 
171  | 0  |     int32_t appendLength = (b1 == 0) ? 1 : 2;  | 
172  | 0  |     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) { | 
173  | 0  |         if(b1 == 0) { | 
174  | 0  |             buffer[len++] = b0;  | 
175  | 0  |         } else { | 
176  | 0  |             buffer[len] = b1;  | 
177  | 0  |             buffer[len + 1] = b0;  | 
178  | 0  |             len += 2;  | 
179  | 0  |         }  | 
180  | 0  |     }  | 
181  | 0  | }  | 
182  |  |  | 
183  | 0  | UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) { | 
184  | 0  |     if(!ok) { | 
185  | 0  |         return FALSE;  | 
186  | 0  |     }  | 
187  | 0  |     int32_t newCapacity = 2 * buffer.getCapacity();  | 
188  | 0  |     int32_t altCapacity = len + 2 * appendCapacity;  | 
189  | 0  |     if (newCapacity < altCapacity) { | 
190  | 0  |         newCapacity = altCapacity;  | 
191  | 0  |     }  | 
192  | 0  |     if (newCapacity < 200) { | 
193  | 0  |         newCapacity = 200;  | 
194  | 0  |     }  | 
195  | 0  |     if(buffer.resize(newCapacity, len)==NULL) { | 
196  | 0  |         return ok = FALSE;  | 
197  | 0  |     }  | 
198  | 0  |     return TRUE;  | 
199  | 0  | }  | 
200  |  |  | 
201  |  | }  // namespace  | 
202  |  |  | 
203  | 0  | CollationKeys::LevelCallback::~LevelCallback() {} | 
204  |  |  | 
205  |  | UBool  | 
206  | 0  | CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; } | 
207  |  |  | 
208  |  | /**  | 
209  |  |  * Map from collation strength (UColAttributeValue)  | 
210  |  |  * to a mask of Collation::Level bits up to that strength,  | 
211  |  |  * excluding the CASE_LEVEL which is independent of the strength,  | 
212  |  |  * and excluding IDENTICAL_LEVEL which this function does not write.  | 
213  |  |  */  | 
214  |  | static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = { | 
215  |  |     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL  | 
216  |  |     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL  | 
217  |  |     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL  | 
218  |  |     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL  | 
219  |  |     0, 0, 0, 0,  | 
220  |  |     0, 0, 0, 0,  | 
221  |  |     0, 0, 0,  | 
222  |  |     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL  | 
223  |  | };  | 
224  |  |  | 
225  |  | void  | 
226  |  | CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,  | 
227  |  |                                           const UBool *compressibleBytes,  | 
228  |  |                                           const CollationSettings &settings,  | 
229  |  |                                           SortKeyByteSink &sink,  | 
230  |  |                                           Collation::Level minLevel, LevelCallback &callback,  | 
231  | 0  |                                           UBool preflight, UErrorCode &errorCode) { | 
232  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
233  |  |  | 
234  | 0  |     int32_t options = settings.options;  | 
235  |  |     // Set of levels to process and write.  | 
236  | 0  |     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];  | 
237  | 0  |     if((options & CollationSettings::CASE_LEVEL) != 0) { | 
238  | 0  |         levels |= Collation::CASE_LEVEL_FLAG;  | 
239  | 0  |     }  | 
240  |  |     // Minus the levels below minLevel.  | 
241  | 0  |     levels &= ~(((uint32_t)1 << minLevel) - 1);  | 
242  | 0  |     if(levels == 0) { return; } | 
243  |  |  | 
244  | 0  |     uint32_t variableTop;  | 
245  | 0  |     if((options & CollationSettings::ALTERNATE_MASK) == 0) { | 
246  | 0  |         variableTop = 0;  | 
247  | 0  |     } else { | 
248  |  |         // +1 so that we can use "<" and primary ignorables test out early.  | 
249  | 0  |         variableTop = settings.variableTop + 1;  | 
250  | 0  |     }  | 
251  |  | 
  | 
252  | 0  |     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);  | 
253  |  | 
  | 
254  | 0  |     SortKeyLevel cases;  | 
255  | 0  |     SortKeyLevel secondaries;  | 
256  | 0  |     SortKeyLevel tertiaries;  | 
257  | 0  |     SortKeyLevel quaternaries;  | 
258  |  | 
  | 
259  | 0  |     uint32_t prevReorderedPrimary = 0;  // 0==no compression  | 
260  | 0  |     int32_t commonCases = 0;  | 
261  | 0  |     int32_t commonSecondaries = 0;  | 
262  | 0  |     int32_t commonTertiaries = 0;  | 
263  | 0  |     int32_t commonQuaternaries = 0;  | 
264  |  | 
  | 
265  | 0  |     uint32_t prevSecondary = 0;  | 
266  | 0  |     int32_t secSegmentStart = 0;  | 
267  |  | 
  | 
268  | 0  |     for(;;) { | 
269  |  |         // No need to keep all CEs in the buffer when we write a sort key.  | 
270  | 0  |         iter.clearCEsIfNoneRemaining();  | 
271  | 0  |         int64_t ce = iter.nextCE(errorCode);  | 
272  | 0  |         uint32_t p = (uint32_t)(ce >> 32);  | 
273  | 0  |         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) { | 
274  |  |             // Variable CE, shift it to quaternary level.  | 
275  |  |             // Ignore all following primary ignorables, and shift further variable CEs.  | 
276  | 0  |             if(commonQuaternaries != 0) { | 
277  | 0  |                 --commonQuaternaries;  | 
278  | 0  |                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { | 
279  | 0  |                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);  | 
280  | 0  |                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;  | 
281  | 0  |                 }  | 
282  |  |                 // Shifted primary weights are lower than the common weight.  | 
283  | 0  |                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);  | 
284  | 0  |                 commonQuaternaries = 0;  | 
285  | 0  |             }  | 
286  | 0  |             do { | 
287  | 0  |                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) { | 
288  | 0  |                     if(settings.hasReordering()) { | 
289  | 0  |                         p = settings.reorder(p);  | 
290  | 0  |                     }  | 
291  | 0  |                     if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) { | 
292  |  |                         // Prevent shifted primary lead bytes from  | 
293  |  |                         // overlapping with the common compression range.  | 
294  | 0  |                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);  | 
295  | 0  |                     }  | 
296  | 0  |                     quaternaries.appendWeight32(p);  | 
297  | 0  |                 }  | 
298  | 0  |                 do { | 
299  | 0  |                     ce = iter.nextCE(errorCode);  | 
300  | 0  |                     p = (uint32_t)(ce >> 32);  | 
301  | 0  |                 } while(p == 0);  | 
302  | 0  |             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);  | 
303  | 0  |         }  | 
304  |  |         // ce could be primary ignorable, or NO_CE, or the merge separator,  | 
305  |  |         // or a regular primary CE, but it is not variable.  | 
306  |  |         // If ce==NO_CE, then write nothing for the primary level but  | 
307  |  |         // terminate compression on all levels and then exit the loop.  | 
308  | 0  |         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) { | 
309  |  |             // Test the un-reordered primary for compressibility.  | 
310  | 0  |             UBool isCompressible = compressibleBytes[p >> 24];  | 
311  | 0  |             if(settings.hasReordering()) { | 
312  | 0  |                 p = settings.reorder(p);  | 
313  | 0  |             }  | 
314  | 0  |             uint32_t p1 = p >> 24;  | 
315  | 0  |             if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) { | 
316  | 0  |                 if(prevReorderedPrimary != 0) { | 
317  | 0  |                     if(p < prevReorderedPrimary) { | 
318  |  |                         // No primary compression terminator  | 
319  |  |                         // at the end of the level or merged segment.  | 
320  | 0  |                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) { | 
321  | 0  |                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);  | 
322  | 0  |                         }  | 
323  | 0  |                     } else { | 
324  | 0  |                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);  | 
325  | 0  |                     }  | 
326  | 0  |                 }  | 
327  | 0  |                 sink.Append(p1);  | 
328  | 0  |                 if(isCompressible) { | 
329  | 0  |                     prevReorderedPrimary = p;  | 
330  | 0  |                 } else { | 
331  | 0  |                     prevReorderedPrimary = 0;  | 
332  | 0  |                 }  | 
333  | 0  |             }  | 
334  | 0  |             char p2 = (char)(p >> 16);  | 
335  | 0  |             if(p2 != 0) { | 
336  | 0  |                 char buffer[3] = { p2, (char)(p >> 8), (char)p }; | 
337  | 0  |                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);  | 
338  | 0  |             }  | 
339  |  |             // Optimization for internalNextSortKeyPart():  | 
340  |  |             // When the primary level overflows we can stop because we need not  | 
341  |  |             // calculate (preflight) the whole sort key length.  | 
342  | 0  |             if(!preflight && sink.Overflowed()) { | 
343  | 0  |                 if(U_SUCCESS(errorCode) && !sink.IsOk()) { | 
344  | 0  |                     errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
345  | 0  |                 }  | 
346  | 0  |                 return;  | 
347  | 0  |             }  | 
348  | 0  |         }  | 
349  |  |  | 
350  | 0  |         uint32_t lower32 = (uint32_t)ce;  | 
351  | 0  |         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary | 
352  |  |  | 
353  | 0  |         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) { | 
354  | 0  |             uint32_t s = lower32 >> 16;  | 
355  | 0  |             if(s == 0) { | 
356  |  |                 // secondary ignorable  | 
357  | 0  |             } else if(s == Collation::COMMON_WEIGHT16 &&  | 
358  | 0  |                     ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||  | 
359  | 0  |                         p != Collation::MERGE_SEPARATOR_PRIMARY)) { | 
360  |  |                 // s is a common secondary weight, and  | 
361  |  |                 // backwards-secondary is off or the ce is not the merge separator.  | 
362  | 0  |                 ++commonSecondaries;  | 
363  | 0  |             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) { | 
364  | 0  |                 if(commonSecondaries != 0) { | 
365  | 0  |                     --commonSecondaries;  | 
366  | 0  |                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) { | 
367  | 0  |                         secondaries.appendByte(SEC_COMMON_MIDDLE);  | 
368  | 0  |                         commonSecondaries -= SEC_COMMON_MAX_COUNT;  | 
369  | 0  |                     }  | 
370  | 0  |                     uint32_t b;  | 
371  | 0  |                     if(s < Collation::COMMON_WEIGHT16) { | 
372  | 0  |                         b = SEC_COMMON_LOW + commonSecondaries;  | 
373  | 0  |                     } else { | 
374  | 0  |                         b = SEC_COMMON_HIGH - commonSecondaries;  | 
375  | 0  |                     }  | 
376  | 0  |                     secondaries.appendByte(b);  | 
377  | 0  |                     commonSecondaries = 0;  | 
378  | 0  |                 }  | 
379  | 0  |                 secondaries.appendWeight16(s);  | 
380  | 0  |             } else { | 
381  | 0  |                 if(commonSecondaries != 0) { | 
382  | 0  |                     --commonSecondaries;  | 
383  |  |                     // Append reverse weights. The level will be re-reversed later.  | 
384  | 0  |                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;  | 
385  | 0  |                     uint32_t b;  | 
386  | 0  |                     if(prevSecondary < Collation::COMMON_WEIGHT16) { | 
387  | 0  |                         b = SEC_COMMON_LOW + remainder;  | 
388  | 0  |                     } else { | 
389  | 0  |                         b = SEC_COMMON_HIGH - remainder;  | 
390  | 0  |                     }  | 
391  | 0  |                     secondaries.appendByte(b);  | 
392  | 0  |                     commonSecondaries -= remainder;  | 
393  |  |                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.  | 
394  | 0  |                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT | 
395  | 0  |                         secondaries.appendByte(SEC_COMMON_MIDDLE);  | 
396  | 0  |                         commonSecondaries -= SEC_COMMON_MAX_COUNT;  | 
397  | 0  |                     }  | 
398  |  |                     // commonSecondaries == 0  | 
399  | 0  |                 }  | 
400  | 0  |                 if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) { | 
401  |  |                     // The backwards secondary level compares secondary weights backwards  | 
402  |  |                     // within segments separated by the merge separator (U+FFFE).  | 
403  | 0  |                     uint8_t *secs = secondaries.data();  | 
404  | 0  |                     int32_t last = secondaries.length() - 1;  | 
405  | 0  |                     if(secSegmentStart < last) { | 
406  | 0  |                         uint8_t *q = secs + secSegmentStart;  | 
407  | 0  |                         uint8_t *r = secs + last;  | 
408  | 0  |                         do { | 
409  | 0  |                             uint8_t b = *q;  | 
410  | 0  |                             *q++ = *r;  | 
411  | 0  |                             *r-- = b;  | 
412  | 0  |                         } while(q < r);  | 
413  | 0  |                     }  | 
414  | 0  |                     secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?  | 
415  | 0  |                         Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);  | 
416  | 0  |                     prevSecondary = 0;  | 
417  | 0  |                     secSegmentStart = secondaries.length();  | 
418  | 0  |                 } else { | 
419  | 0  |                     secondaries.appendReverseWeight16(s);  | 
420  | 0  |                     prevSecondary = s;  | 
421  | 0  |                 }  | 
422  | 0  |             }  | 
423  | 0  |         }  | 
424  |  | 
  | 
425  | 0  |         if((levels & Collation::CASE_LEVEL_FLAG) != 0) { | 
426  | 0  |             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?  | 
427  | 0  |                     p == 0 : lower32 <= 0xffff) { | 
428  |  |                 // Primary+caseLevel: Ignore case level weights of primary ignorables.  | 
429  |  |                 // Otherwise: Ignore case level weights of secondary ignorables.  | 
430  |  |                 // For details see the comments in the CollationCompare class.  | 
431  | 0  |             } else { | 
432  | 0  |                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte  | 
433  | 0  |                 U_ASSERT((c & 0xc0) != 0xc0);  | 
434  | 0  |                 if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) { | 
435  | 0  |                     ++commonCases;  | 
436  | 0  |                 } else { | 
437  | 0  |                     if((options & CollationSettings::UPPER_FIRST) == 0) { | 
438  |  |                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.  | 
439  |  |                         // If there are only common (=lowest) weights in the whole level,  | 
440  |  |                         // then we need not write anything.  | 
441  |  |                         // Level length differences are handled already on the next-higher level.  | 
442  | 0  |                         if(commonCases != 0 &&  | 
443  | 0  |                                 (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) { | 
444  | 0  |                             --commonCases;  | 
445  | 0  |                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { | 
446  | 0  |                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);  | 
447  | 0  |                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;  | 
448  | 0  |                             }  | 
449  | 0  |                             uint32_t b;  | 
450  | 0  |                             if(c <= Collation::LEVEL_SEPARATOR_BYTE) { | 
451  | 0  |                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;  | 
452  | 0  |                             } else { | 
453  | 0  |                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;  | 
454  | 0  |                             }  | 
455  | 0  |                             cases.appendByte(b << 4);  | 
456  | 0  |                             commonCases = 0;  | 
457  | 0  |                         }  | 
458  | 0  |                         if(c > Collation::LEVEL_SEPARATOR_BYTE) { | 
459  | 0  |                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15  | 
460  | 0  |                         }  | 
461  | 0  |                     } else { | 
462  |  |                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.  | 
463  |  |                         // The compressed common case weights only go up from the "low" value  | 
464  |  |                         // because with upperFirst the common weight is the highest one.  | 
465  | 0  |                         if(commonCases != 0) { | 
466  | 0  |                             --commonCases;  | 
467  | 0  |                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { | 
468  | 0  |                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);  | 
469  | 0  |                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;  | 
470  | 0  |                             }  | 
471  | 0  |                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);  | 
472  | 0  |                             commonCases = 0;  | 
473  | 0  |                         }  | 
474  | 0  |                         if(c > Collation::LEVEL_SEPARATOR_BYTE) { | 
475  | 0  |                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1  | 
476  | 0  |                         }  | 
477  | 0  |                     }  | 
478  |  |                     // c is a separator byte 01,  | 
479  |  |                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.  | 
480  | 0  |                     cases.appendByte(c);  | 
481  | 0  |                 }  | 
482  | 0  |             }  | 
483  | 0  |         }  | 
484  |  | 
  | 
485  | 0  |         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) { | 
486  | 0  |             uint32_t t = lower32 & tertiaryMask;  | 
487  | 0  |             U_ASSERT((lower32 & 0xc000) != 0xc000);  | 
488  | 0  |             if(t == Collation::COMMON_WEIGHT16) { | 
489  | 0  |                 ++commonTertiaries;  | 
490  | 0  |             } else if((tertiaryMask & 0x8000) == 0) { | 
491  |  |                 // Tertiary weights without case bits.  | 
492  |  |                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.  | 
493  | 0  |                 if(commonTertiaries != 0) { | 
494  | 0  |                     --commonTertiaries;  | 
495  | 0  |                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { | 
496  | 0  |                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);  | 
497  | 0  |                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;  | 
498  | 0  |                     }  | 
499  | 0  |                     uint32_t b;  | 
500  | 0  |                     if(t < Collation::COMMON_WEIGHT16) { | 
501  | 0  |                         b = TER_ONLY_COMMON_LOW + commonTertiaries;  | 
502  | 0  |                     } else { | 
503  | 0  |                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;  | 
504  | 0  |                     }  | 
505  | 0  |                     tertiaries.appendByte(b);  | 
506  | 0  |                     commonTertiaries = 0;  | 
507  | 0  |                 }  | 
508  | 0  |                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; } | 
509  | 0  |                 tertiaries.appendWeight16(t);  | 
510  | 0  |             } else if((options & CollationSettings::UPPER_FIRST) == 0) { | 
511  |  |                 // Tertiary weights with caseFirst=lowerFirst.  | 
512  |  |                 // Move lead bytes 06..BF to 46..FF for the common-weight range.  | 
513  | 0  |                 if(commonTertiaries != 0) { | 
514  | 0  |                     --commonTertiaries;  | 
515  | 0  |                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { | 
516  | 0  |                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);  | 
517  | 0  |                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;  | 
518  | 0  |                     }  | 
519  | 0  |                     uint32_t b;  | 
520  | 0  |                     if(t < Collation::COMMON_WEIGHT16) { | 
521  | 0  |                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;  | 
522  | 0  |                     } else { | 
523  | 0  |                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;  | 
524  | 0  |                     }  | 
525  | 0  |                     tertiaries.appendByte(b);  | 
526  | 0  |                     commonTertiaries = 0;  | 
527  | 0  |                 }  | 
528  | 0  |                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; } | 
529  | 0  |                 tertiaries.appendWeight16(t);  | 
530  | 0  |             } else { | 
531  |  |                 // Tertiary weights with caseFirst=upperFirst.  | 
532  |  |                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),  | 
533  |  |                 // to keep tertiary CEs well-formed.  | 
534  |  |                 // Their case+tertiary weights must be greater than those of  | 
535  |  |                 // primary and secondary CEs.  | 
536  |  |                 //  | 
537  |  |                 // Separator         01 -> 01      (unchanged)  | 
538  |  |                 // Lowercase     02..04 -> 82..84  (includes uncased)  | 
539  |  |                 // Common weight     05 -> 85..C5  (common-weight compression range)  | 
540  |  |                 // Lowercase     06..3F -> C6..FF  | 
541  |  |                 // Mixed case    42..7F -> 42..7F  | 
542  |  |                 // Uppercase     82..BF -> 02..3F  | 
543  |  |                 // Tertiary CE   86..BF -> C6..FF  | 
544  | 0  |                 if(t <= Collation::NO_CE_WEIGHT16) { | 
545  |  |                     // Keep separators unchanged.  | 
546  | 0  |                 } else if(lower32 > 0xffff) { | 
547  |  |                     // Invert case bits of primary & secondary CEs.  | 
548  | 0  |                     t ^= 0xc000;  | 
549  | 0  |                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { | 
550  | 0  |                         t -= 0x4000;  | 
551  | 0  |                     }  | 
552  | 0  |                 } else { | 
553  |  |                     // Keep uppercase bits of tertiary CEs.  | 
554  | 0  |                     U_ASSERT(0x8600 <= t && t <= 0xbfff);  | 
555  | 0  |                     t += 0x4000;  | 
556  | 0  |                 }  | 
557  | 0  |                 if(commonTertiaries != 0) { | 
558  | 0  |                     --commonTertiaries;  | 
559  | 0  |                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { | 
560  | 0  |                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);  | 
561  | 0  |                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;  | 
562  | 0  |                     }  | 
563  | 0  |                     uint32_t b;  | 
564  | 0  |                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { | 
565  | 0  |                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;  | 
566  | 0  |                     } else { | 
567  | 0  |                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;  | 
568  | 0  |                     }  | 
569  | 0  |                     tertiaries.appendByte(b);  | 
570  | 0  |                     commonTertiaries = 0;  | 
571  | 0  |                 }  | 
572  | 0  |                 tertiaries.appendWeight16(t);  | 
573  | 0  |             }  | 
574  | 0  |         }  | 
575  |  | 
  | 
576  | 0  |         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) { | 
577  | 0  |             uint32_t q = lower32 & 0xffff;  | 
578  | 0  |             if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) { | 
579  | 0  |                 ++commonQuaternaries;  | 
580  | 0  |             } else if(q == Collation::NO_CE_WEIGHT16 &&  | 
581  | 0  |                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&  | 
582  | 0  |                     quaternaries.isEmpty()) { | 
583  |  |                 // If alternate=non-ignorable and there are only common quaternary weights,  | 
584  |  |                 // then we need not write anything.  | 
585  |  |                 // The only weights greater than the merge separator and less than the common weight  | 
586  |  |                 // are shifted primary weights, which are not generated for alternate=non-ignorable.  | 
587  |  |                 // There are also exactly as many quaternary weights as tertiary weights,  | 
588  |  |                 // so level length differences are handled already on tertiary level.  | 
589  |  |                 // Any above-common quaternary weight will compare greater regardless.  | 
590  | 0  |                 quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);  | 
591  | 0  |             } else { | 
592  | 0  |                 if(q == Collation::NO_CE_WEIGHT16) { | 
593  | 0  |                     q = Collation::LEVEL_SEPARATOR_BYTE;  | 
594  | 0  |                 } else { | 
595  | 0  |                     q = 0xfc + ((q >> 6) & 3);  | 
596  | 0  |                 }  | 
597  | 0  |                 if(commonQuaternaries != 0) { | 
598  | 0  |                     --commonQuaternaries;  | 
599  | 0  |                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { | 
600  | 0  |                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);  | 
601  | 0  |                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;  | 
602  | 0  |                     }  | 
603  | 0  |                     uint32_t b;  | 
604  | 0  |                     if(q < QUAT_COMMON_LOW) { | 
605  | 0  |                         b = QUAT_COMMON_LOW + commonQuaternaries;  | 
606  | 0  |                     } else { | 
607  | 0  |                         b = QUAT_COMMON_HIGH - commonQuaternaries;  | 
608  | 0  |                     }  | 
609  | 0  |                     quaternaries.appendByte(b);  | 
610  | 0  |                     commonQuaternaries = 0;  | 
611  | 0  |                 }  | 
612  | 0  |                 quaternaries.appendByte(q);  | 
613  | 0  |             }  | 
614  | 0  |         }  | 
615  |  | 
  | 
616  | 0  |         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE | 
617  | 0  |     }  | 
618  |  |  | 
619  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
620  |  |  | 
621  |  |     // Append the beyond-primary levels.  | 
622  | 0  |     UBool ok = TRUE;  | 
623  | 0  |     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) { | 
624  | 0  |         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; } | 
625  | 0  |         ok &= secondaries.isOk();  | 
626  | 0  |         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);  | 
627  | 0  |         secondaries.appendTo(sink);  | 
628  | 0  |     }  | 
629  |  |  | 
630  | 0  |     if((levels & Collation::CASE_LEVEL_FLAG) != 0) { | 
631  | 0  |         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; } | 
632  | 0  |         ok &= cases.isOk();  | 
633  | 0  |         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);  | 
634  |  |         // Write pairs of nibbles as bytes, except separator bytes as themselves.  | 
635  | 0  |         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.  | 
636  | 0  |         uint8_t b = 0;  | 
637  | 0  |         for(int32_t i = 0; i < length; ++i) { | 
638  | 0  |             uint8_t c = (uint8_t)cases[i];  | 
639  | 0  |             U_ASSERT((c & 0xf) == 0 && c != 0);  | 
640  | 0  |             if(b == 0) { | 
641  | 0  |                 b = c;  | 
642  | 0  |             } else { | 
643  | 0  |                 sink.Append(b | (c >> 4));  | 
644  | 0  |                 b = 0;  | 
645  | 0  |             }  | 
646  | 0  |         }  | 
647  | 0  |         if(b != 0) { | 
648  | 0  |             sink.Append(b);  | 
649  | 0  |         }  | 
650  | 0  |     }  | 
651  |  |  | 
652  | 0  |     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) { | 
653  | 0  |         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; } | 
654  | 0  |         ok &= tertiaries.isOk();  | 
655  | 0  |         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);  | 
656  | 0  |         tertiaries.appendTo(sink);  | 
657  | 0  |     }  | 
658  |  |  | 
659  | 0  |     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) { | 
660  | 0  |         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; } | 
661  | 0  |         ok &= quaternaries.isOk();  | 
662  | 0  |         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);  | 
663  | 0  |         quaternaries.appendTo(sink);  | 
664  | 0  |     }  | 
665  |  |  | 
666  | 0  |     if(!ok || !sink.IsOk()) { | 
667  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
668  | 0  |     }  | 
669  | 0  | }  | 
670  |  |  | 
671  |  | U_NAMESPACE_END  | 
672  |  |  | 
673  |  | #endif  // !UCONFIG_NO_COLLATION  |