/src/icu/source/i18n/collationdatabuilder.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2012-2015, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collationdatabuilder.cpp  | 
9  |  | *  | 
10  |  | * (replaced the former ucol_elm.cpp)  | 
11  |  | *  | 
12  |  | * created on: 2012apr01  | 
13  |  | * created by: Markus W. Scherer  | 
14  |  | */  | 
15  |  |  | 
16  |  | #include "unicode/utypes.h"  | 
17  |  |  | 
18  |  | #if !UCONFIG_NO_COLLATION  | 
19  |  |  | 
20  |  | #include "unicode/localpointer.h"  | 
21  |  | #include "unicode/uchar.h"  | 
22  |  | #include "unicode/ucharstrie.h"  | 
23  |  | #include "unicode/ucharstriebuilder.h"  | 
24  |  | #include "unicode/uniset.h"  | 
25  |  | #include "unicode/unistr.h"  | 
26  |  | #include "unicode/usetiter.h"  | 
27  |  | #include "unicode/utf16.h"  | 
28  |  | #include "cmemory.h"  | 
29  |  | #include "collation.h"  | 
30  |  | #include "collationdata.h"  | 
31  |  | #include "collationdatabuilder.h"  | 
32  |  | #include "collationfastlatinbuilder.h"  | 
33  |  | #include "collationiterator.h"  | 
34  |  | #include "normalizer2impl.h"  | 
35  |  | #include "utrie2.h"  | 
36  |  | #include "uvectr32.h"  | 
37  |  | #include "uvectr64.h"  | 
38  |  | #include "uvector.h"  | 
39  |  |  | 
40  |  | U_NAMESPACE_BEGIN  | 
41  |  |  | 
42  | 0  | CollationDataBuilder::CEModifier::~CEModifier() {} | 
43  |  |  | 
44  |  | /**  | 
45  |  |  * Build-time context and CE32 for a code point.  | 
46  |  |  * If a code point has contextual mappings, then the default (no-context) mapping  | 
47  |  |  * and all conditional mappings are stored in a singly-linked list  | 
48  |  |  * of ConditionalCE32, sorted by context strings.  | 
49  |  |  *  | 
50  |  |  * Context strings sort by prefix length, then by prefix, then by contraction suffix.  | 
51  |  |  * Context strings must be unique and in ascending order.  | 
52  |  |  */  | 
53  |  | struct ConditionalCE32 : public UMemory { | 
54  |  |     ConditionalCE32()  | 
55  | 0  |             : context(),  | 
56  | 0  |               ce32(0), defaultCE32(Collation::NO_CE32), builtCE32(Collation::NO_CE32),  | 
57  | 0  |               next(-1) {} | 
58  |  |     ConditionalCE32(const UnicodeString &ct, uint32_t ce)  | 
59  | 0  |             : context(ct),  | 
60  | 0  |               ce32(ce), defaultCE32(Collation::NO_CE32), builtCE32(Collation::NO_CE32),  | 
61  | 0  |               next(-1) {} | 
62  |  |  | 
63  | 0  |     inline UBool hasContext() const { return context.length() > 1; } | 
64  | 0  |     inline int32_t prefixLength() const { return context.charAt(0); } | 
65  |  |  | 
66  |  |     /**  | 
67  |  |      * "\0" for the first entry for any code point, with its default CE32.  | 
68  |  |      *  | 
69  |  |      * Otherwise one unit with the length of the prefix string,  | 
70  |  |      * then the prefix string, then the contraction suffix.  | 
71  |  |      */  | 
72  |  |     UnicodeString context;  | 
73  |  |     /**  | 
74  |  |      * CE32 for the code point and its context.  | 
75  |  |      * Can be special (e.g., for an expansion) but not contextual (prefix or contraction tag).  | 
76  |  |      */  | 
77  |  |     uint32_t ce32;  | 
78  |  |     /**  | 
79  |  |      * Default CE32 for all contexts with this same prefix.  | 
80  |  |      * Initially NO_CE32. Set only while building runtime data structures,  | 
81  |  |      * and only on one of the nodes of a sub-list with the same prefix.  | 
82  |  |      */  | 
83  |  |     uint32_t defaultCE32;  | 
84  |  |     /**  | 
85  |  |      * CE32 for the built contexts.  | 
86  |  |      * When fetching CEs from the builder, the contexts are built into their runtime form  | 
87  |  |      * so that the normal collation implementation can process them.  | 
88  |  |      * The result is cached in the list head. It is reset when the contexts are modified.  | 
89  |  |      */  | 
90  |  |     uint32_t builtCE32;  | 
91  |  |     /**  | 
92  |  |      * Index of the next ConditionalCE32.  | 
93  |  |      * Negative for the end of the list.  | 
94  |  |      */  | 
95  |  |     int32_t next;  | 
96  |  | };  | 
97  |  |  | 
98  |  | U_CDECL_BEGIN  | 
99  |  |  | 
100  |  | U_CAPI void U_CALLCONV  | 
101  | 0  | uprv_deleteConditionalCE32(void *obj) { | 
102  | 0  |     delete static_cast<ConditionalCE32 *>(obj);  | 
103  | 0  | }  | 
104  |  |  | 
105  |  | U_CDECL_END  | 
106  |  |  | 
107  |  | /**  | 
108  |  |  * Build-time collation element and character iterator.  | 
109  |  |  * Uses the runtime CollationIterator for fetching CEs for a string  | 
110  |  |  * but reads from the builder's unfinished data structures.  | 
111  |  |  * In particular, this class reads from the unfinished trie  | 
112  |  |  * and has to avoid CollationIterator::nextCE() and redirect other  | 
113  |  |  * calls to data->getCE32() and data->getCE32FromSupplementary().  | 
114  |  |  *  | 
115  |  |  * We do this so that we need not implement the collation algorithm  | 
116  |  |  * again for the builder and make it behave exactly like the runtime code.  | 
117  |  |  * That would be more difficult to test and maintain than this indirection.  | 
118  |  |  *  | 
119  |  |  * Some CE32 tags (for example, the DIGIT_TAG) do not occur in the builder data,  | 
120  |  |  * so the data accesses from those code paths need not be modified.  | 
121  |  |  *  | 
122  |  |  * This class iterates directly over whole code points  | 
123  |  |  * so that the CollationIterator does not need the finished trie  | 
124  |  |  * for handling the LEAD_SURROGATE_TAG.  | 
125  |  |  */  | 
126  |  | class DataBuilderCollationIterator : public CollationIterator { | 
127  |  | public:  | 
128  |  |     DataBuilderCollationIterator(CollationDataBuilder &b);  | 
129  |  |  | 
130  |  |     virtual ~DataBuilderCollationIterator();  | 
131  |  |  | 
132  |  |     int32_t fetchCEs(const UnicodeString &str, int32_t start, int64_t ces[], int32_t cesLength);  | 
133  |  |  | 
134  |  |     virtual void resetToOffset(int32_t newOffset);  | 
135  |  |     virtual int32_t getOffset() const;  | 
136  |  |  | 
137  |  |     virtual UChar32 nextCodePoint(UErrorCode &errorCode);  | 
138  |  |     virtual UChar32 previousCodePoint(UErrorCode &errorCode);  | 
139  |  |  | 
140  |  | protected:  | 
141  |  |     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);  | 
142  |  |     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);  | 
143  |  |  | 
144  |  |     virtual uint32_t getDataCE32(UChar32 c) const;  | 
145  |  |     virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode);  | 
146  |  |  | 
147  |  |     CollationDataBuilder &builder;  | 
148  |  |     CollationData builderData;  | 
149  |  |     uint32_t jamoCE32s[CollationData::JAMO_CE32S_LENGTH];  | 
150  |  |     const UnicodeString *s;  | 
151  |  |     int32_t pos;  | 
152  |  | };  | 
153  |  |  | 
154  |  | DataBuilderCollationIterator::DataBuilderCollationIterator(CollationDataBuilder &b)  | 
155  | 0  |         : CollationIterator(&builderData, /*numeric=*/ FALSE),  | 
156  | 0  |           builder(b), builderData(b.nfcImpl),  | 
157  | 0  |           s(NULL), pos(0) { | 
158  | 0  |     builderData.base = builder.base;  | 
159  |  |     // Set all of the jamoCE32s[] to indirection CE32s.  | 
160  | 0  |     for(int32_t j = 0; j < CollationData::JAMO_CE32S_LENGTH; ++j) {  // Count across Jamo types. | 
161  | 0  |         UChar32 jamo = CollationDataBuilder::jamoCpFromIndex(j);  | 
162  | 0  |         jamoCE32s[j] = Collation::makeCE32FromTagAndIndex(Collation::BUILDER_DATA_TAG, jamo) |  | 
163  | 0  |                 CollationDataBuilder::IS_BUILDER_JAMO_CE32;  | 
164  | 0  |     }  | 
165  | 0  |     builderData.jamoCE32s = jamoCE32s;  | 
166  | 0  | }  | 
167  |  |  | 
168  | 0  | DataBuilderCollationIterator::~DataBuilderCollationIterator() {} | 
169  |  |  | 
170  |  | int32_t  | 
171  |  | DataBuilderCollationIterator::fetchCEs(const UnicodeString &str, int32_t start,  | 
172  | 0  |                                        int64_t ces[], int32_t cesLength) { | 
173  |  |     // Set the pointers each time, in case they changed due to reallocation.  | 
174  | 0  |     builderData.ce32s = reinterpret_cast<const uint32_t *>(builder.ce32s.getBuffer());  | 
175  | 0  |     builderData.ces = builder.ce64s.getBuffer();  | 
176  | 0  |     builderData.contexts = builder.contexts.getBuffer();  | 
177  |  |     // Modified copy of CollationIterator::nextCE() and CollationIterator::nextCEFromCE32().  | 
178  | 0  |     reset();  | 
179  | 0  |     s = &str;  | 
180  | 0  |     pos = start;  | 
181  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
182  | 0  |     while(U_SUCCESS(errorCode) && pos < s->length()) { | 
183  |  |         // No need to keep all CEs in the iterator buffer.  | 
184  | 0  |         clearCEs();  | 
185  | 0  |         UChar32 c = s->char32At(pos);  | 
186  | 0  |         pos += U16_LENGTH(c);  | 
187  | 0  |         uint32_t ce32 = utrie2_get32(builder.trie, c);  | 
188  | 0  |         const CollationData *d;  | 
189  | 0  |         if(ce32 == Collation::FALLBACK_CE32) { | 
190  | 0  |             d = builder.base;  | 
191  | 0  |             ce32 = builder.base->getCE32(c);  | 
192  | 0  |         } else { | 
193  | 0  |             d = &builderData;  | 
194  | 0  |         }  | 
195  | 0  |         appendCEsFromCE32(d, c, ce32, /*forward=*/ TRUE, errorCode);  | 
196  | 0  |         U_ASSERT(U_SUCCESS(errorCode));  | 
197  | 0  |         for(int32_t i = 0; i < getCEsLength(); ++i) { | 
198  | 0  |             int64_t ce = getCE(i);  | 
199  | 0  |             if(ce != 0) { | 
200  | 0  |                 if(cesLength < Collation::MAX_EXPANSION_LENGTH) { | 
201  | 0  |                     ces[cesLength] = ce;  | 
202  | 0  |                 }  | 
203  | 0  |                 ++cesLength;  | 
204  | 0  |             }  | 
205  | 0  |         }  | 
206  | 0  |     }  | 
207  | 0  |     return cesLength;  | 
208  | 0  | }  | 
209  |  |  | 
210  |  | void  | 
211  | 0  | DataBuilderCollationIterator::resetToOffset(int32_t newOffset) { | 
212  | 0  |     reset();  | 
213  | 0  |     pos = newOffset;  | 
214  | 0  | }  | 
215  |  |  | 
216  |  | int32_t  | 
217  | 0  | DataBuilderCollationIterator::getOffset() const { | 
218  | 0  |     return pos;  | 
219  | 0  | }  | 
220  |  |  | 
221  |  | UChar32  | 
222  | 0  | DataBuilderCollationIterator::nextCodePoint(UErrorCode & /*errorCode*/) { | 
223  | 0  |     if(pos == s->length()) { | 
224  | 0  |         return U_SENTINEL;  | 
225  | 0  |     }  | 
226  | 0  |     UChar32 c = s->char32At(pos);  | 
227  | 0  |     pos += U16_LENGTH(c);  | 
228  | 0  |     return c;  | 
229  | 0  | }  | 
230  |  |  | 
231  |  | UChar32  | 
232  | 0  | DataBuilderCollationIterator::previousCodePoint(UErrorCode & /*errorCode*/) { | 
233  | 0  |     if(pos == 0) { | 
234  | 0  |         return U_SENTINEL;  | 
235  | 0  |     }  | 
236  | 0  |     UChar32 c = s->char32At(pos - 1);  | 
237  | 0  |     pos -= U16_LENGTH(c);  | 
238  | 0  |     return c;  | 
239  | 0  | }  | 
240  |  |  | 
241  |  | void  | 
242  | 0  | DataBuilderCollationIterator::forwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) { | 
243  | 0  |     pos = s->moveIndex32(pos, num);  | 
244  | 0  | }  | 
245  |  |  | 
246  |  | void  | 
247  | 0  | DataBuilderCollationIterator::backwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) { | 
248  | 0  |     pos = s->moveIndex32(pos, -num);  | 
249  | 0  | }  | 
250  |  |  | 
251  |  | uint32_t  | 
252  | 0  | DataBuilderCollationIterator::getDataCE32(UChar32 c) const { | 
253  | 0  |     return utrie2_get32(builder.trie, c);  | 
254  | 0  | }  | 
255  |  |  | 
256  |  | uint32_t  | 
257  | 0  | DataBuilderCollationIterator::getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode) { | 
258  | 0  |     if (U_FAILURE(errorCode)) { return 0; } | 
259  | 0  |     U_ASSERT(Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG));  | 
260  | 0  |     if((ce32 & CollationDataBuilder::IS_BUILDER_JAMO_CE32) != 0) { | 
261  | 0  |         UChar32 jamo = Collation::indexFromCE32(ce32);  | 
262  | 0  |         return utrie2_get32(builder.trie, jamo);  | 
263  | 0  |     } else { | 
264  | 0  |         ConditionalCE32 *cond = builder.getConditionalCE32ForCE32(ce32);  | 
265  | 0  |         if (cond == nullptr) { | 
266  | 0  |             errorCode = U_INTERNAL_PROGRAM_ERROR;  | 
267  |  |             // TODO: ICU-21531 figure out why this happens.  | 
268  | 0  |             return 0;  | 
269  | 0  |         }  | 
270  | 0  |         if(cond->builtCE32 == Collation::NO_CE32) { | 
271  |  |             // Build the context-sensitive mappings into their runtime form and cache the result.  | 
272  | 0  |             cond->builtCE32 = builder.buildContext(cond, errorCode);  | 
273  | 0  |             if(errorCode == U_BUFFER_OVERFLOW_ERROR) { | 
274  | 0  |                 errorCode = U_ZERO_ERROR;  | 
275  | 0  |                 builder.clearContexts();  | 
276  | 0  |                 cond->builtCE32 = builder.buildContext(cond, errorCode);  | 
277  | 0  |             }  | 
278  | 0  |             builderData.contexts = builder.contexts.getBuffer();  | 
279  | 0  |         }  | 
280  | 0  |         return cond->builtCE32;  | 
281  | 0  |     }  | 
282  | 0  | }  | 
283  |  |  | 
284  |  | // ------------------------------------------------------------------------- ***  | 
285  |  |  | 
286  |  | CollationDataBuilder::CollationDataBuilder(UErrorCode &errorCode)  | 
287  | 0  |         : nfcImpl(*Normalizer2Factory::getNFCImpl(errorCode)),  | 
288  |  |           base(NULL), baseSettings(NULL),  | 
289  |  |           trie(NULL),  | 
290  | 0  |           ce32s(errorCode), ce64s(errorCode), conditionalCE32s(errorCode),  | 
291  | 0  |           modified(FALSE),  | 
292  | 0  |           fastLatinEnabled(FALSE), fastLatinBuilder(NULL),  | 
293  | 0  |           collIter(NULL) { | 
294  |  |     // Reserve the first CE32 for U+0000.  | 
295  | 0  |     ce32s.addElement(0, errorCode);  | 
296  | 0  |     conditionalCE32s.setDeleter(uprv_deleteConditionalCE32);  | 
297  | 0  | }  | 
298  |  |  | 
299  | 0  | CollationDataBuilder::~CollationDataBuilder() { | 
300  | 0  |     utrie2_close(trie);  | 
301  | 0  |     delete fastLatinBuilder;  | 
302  | 0  |     delete collIter;  | 
303  | 0  | }  | 
304  |  |  | 
305  |  | void  | 
306  | 0  | CollationDataBuilder::initForTailoring(const CollationData *b, UErrorCode &errorCode) { | 
307  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
308  | 0  |     if(trie != NULL) { | 
309  | 0  |         errorCode = U_INVALID_STATE_ERROR;  | 
310  | 0  |         return;  | 
311  | 0  |     }  | 
312  | 0  |     if(b == NULL) { | 
313  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
314  | 0  |         return;  | 
315  | 0  |     }  | 
316  | 0  |     base = b;  | 
317  |  |  | 
318  |  |     // For a tailoring, the default is to fall back to the base.  | 
319  | 0  |     trie = utrie2_open(Collation::FALLBACK_CE32, Collation::FFFD_CE32, &errorCode);  | 
320  |  |  | 
321  |  |     // Set the Latin-1 letters block so that it is allocated first in the data array,  | 
322  |  |     // to try to improve locality of reference when sorting Latin-1 text.  | 
323  |  |     // Do not use utrie2_setRange32() since that will not actually allocate blocks  | 
324  |  |     // that are filled with the default value.  | 
325  |  |     // ASCII (0..7F) is already preallocated anyway.  | 
326  | 0  |     for(UChar32 c = 0xc0; c <= 0xff; ++c) { | 
327  | 0  |         utrie2_set32(trie, c, Collation::FALLBACK_CE32, &errorCode);  | 
328  | 0  |     }  | 
329  |  |  | 
330  |  |     // Hangul syllables are not tailorable (except via tailoring Jamos).  | 
331  |  |     // Always set the Hangul tag to help performance.  | 
332  |  |     // Do this here, rather than in buildMappings(),  | 
333  |  |     // so that we see the HANGUL_TAG in various assertions.  | 
334  | 0  |     uint32_t hangulCE32 = Collation::makeCE32FromTagAndIndex(Collation::HANGUL_TAG, 0);  | 
335  | 0  |     utrie2_setRange32(trie, Hangul::HANGUL_BASE, Hangul::HANGUL_END, hangulCE32, TRUE, &errorCode);  | 
336  |  |  | 
337  |  |     // Copy the set contents but don't copy/clone the set as a whole because  | 
338  |  |     // that would copy the isFrozen state too.  | 
339  | 0  |     unsafeBackwardSet.addAll(*b->unsafeBackwardSet);  | 
340  |  | 
  | 
341  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
342  | 0  | }  | 
343  |  |  | 
344  |  | UBool  | 
345  |  | CollationDataBuilder::maybeSetPrimaryRange(UChar32 start, UChar32 end,  | 
346  |  |                                            uint32_t primary, int32_t step,  | 
347  | 0  |                                            UErrorCode &errorCode) { | 
348  | 0  |     if(U_FAILURE(errorCode)) { return FALSE; } | 
349  | 0  |     U_ASSERT(start <= end);  | 
350  |  |     // TODO: Do we need to check what values are currently set for start..end?  | 
351  |  |     // An offset range is worth it only if we can achieve an overlap between  | 
352  |  |     // adjacent UTrie2 blocks of 32 code points each.  | 
353  |  |     // An offset CE is also a little more expensive to look up and compute  | 
354  |  |     // than a simple CE.  | 
355  |  |     // If the range spans at least three UTrie2 block boundaries (> 64 code points),  | 
356  |  |     // then we take it.  | 
357  |  |     // If the range spans one or two block boundaries and there are  | 
358  |  |     // at least 4 code points on either side, then we take it.  | 
359  |  |     // (We could additionally require a minimum range length of, say, 16.)  | 
360  | 0  |     int32_t blockDelta = (end >> 5) - (start >> 5);  | 
361  | 0  |     if(2 <= step && step <= 0x7f &&  | 
362  | 0  |             (blockDelta >= 3 ||  | 
363  | 0  |             (blockDelta > 0 && (start & 0x1f) <= 0x1c && (end & 0x1f) >= 3))) { | 
364  | 0  |         int64_t dataCE = ((int64_t)primary << 32) | (start << 8) | step;  | 
365  | 0  |         if(isCompressiblePrimary(primary)) { dataCE |= 0x80; } | 
366  | 0  |         int32_t index = addCE(dataCE, errorCode);  | 
367  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
368  | 0  |         if(index > Collation::MAX_INDEX) { | 
369  | 0  |             errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
370  | 0  |             return 0;  | 
371  | 0  |         }  | 
372  | 0  |         uint32_t offsetCE32 = Collation::makeCE32FromTagAndIndex(Collation::OFFSET_TAG, index);  | 
373  | 0  |         utrie2_setRange32(trie, start, end, offsetCE32, TRUE, &errorCode);  | 
374  | 0  |         modified = TRUE;  | 
375  | 0  |         return TRUE;  | 
376  | 0  |     } else { | 
377  | 0  |         return FALSE;  | 
378  | 0  |     }  | 
379  | 0  | }  | 
380  |  |  | 
381  |  | uint32_t  | 
382  |  | CollationDataBuilder::setPrimaryRangeAndReturnNext(UChar32 start, UChar32 end,  | 
383  |  |                                                    uint32_t primary, int32_t step,  | 
384  | 0  |                                                    UErrorCode &errorCode) { | 
385  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
386  | 0  |     UBool isCompressible = isCompressiblePrimary(primary);  | 
387  | 0  |     if(maybeSetPrimaryRange(start, end, primary, step, errorCode)) { | 
388  | 0  |         return Collation::incThreeBytePrimaryByOffset(primary, isCompressible,  | 
389  | 0  |                                                       (end - start + 1) * step);  | 
390  | 0  |     } else { | 
391  |  |         // Short range: Set individual CE32s.  | 
392  | 0  |         for(;;) { | 
393  | 0  |             utrie2_set32(trie, start, Collation::makeLongPrimaryCE32(primary), &errorCode);  | 
394  | 0  |             ++start;  | 
395  | 0  |             primary = Collation::incThreeBytePrimaryByOffset(primary, isCompressible, step);  | 
396  | 0  |             if(start > end) { return primary; } | 
397  | 0  |         }  | 
398  | 0  |         modified = TRUE;  | 
399  | 0  |     }  | 
400  | 0  | }  | 
401  |  |  | 
402  |  | uint32_t  | 
403  | 0  | CollationDataBuilder::getCE32FromOffsetCE32(UBool fromBase, UChar32 c, uint32_t ce32) const { | 
404  | 0  |     int32_t i = Collation::indexFromCE32(ce32);  | 
405  | 0  |     int64_t dataCE = fromBase ? base->ces[i] : ce64s.elementAti(i);  | 
406  | 0  |     uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);  | 
407  | 0  |     return Collation::makeLongPrimaryCE32(p);  | 
408  | 0  | }  | 
409  |  |  | 
410  |  | UBool  | 
411  | 0  | CollationDataBuilder::isCompressibleLeadByte(uint32_t b) const { | 
412  | 0  |     return base->isCompressibleLeadByte(b);  | 
413  | 0  | }  | 
414  |  |  | 
415  |  | UBool  | 
416  | 0  | CollationDataBuilder::isAssigned(UChar32 c) const { | 
417  | 0  |     return Collation::isAssignedCE32(utrie2_get32(trie, c));  | 
418  | 0  | }  | 
419  |  |  | 
420  |  | uint32_t  | 
421  | 0  | CollationDataBuilder::getLongPrimaryIfSingleCE(UChar32 c) const { | 
422  | 0  |     uint32_t ce32 = utrie2_get32(trie, c);  | 
423  | 0  |     if(Collation::isLongPrimaryCE32(ce32)) { | 
424  | 0  |         return Collation::primaryFromLongPrimaryCE32(ce32);  | 
425  | 0  |     } else { | 
426  | 0  |         return 0;  | 
427  | 0  |     }  | 
428  | 0  | }  | 
429  |  |  | 
430  |  | int64_t  | 
431  | 0  | CollationDataBuilder::getSingleCE(UChar32 c, UErrorCode &errorCode) const { | 
432  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
433  |  |     // Keep parallel with CollationData::getSingleCE().  | 
434  | 0  |     UBool fromBase = FALSE;  | 
435  | 0  |     uint32_t ce32 = utrie2_get32(trie, c);  | 
436  | 0  |     if(ce32 == Collation::FALLBACK_CE32) { | 
437  | 0  |         fromBase = TRUE;  | 
438  | 0  |         ce32 = base->getCE32(c);  | 
439  | 0  |     }  | 
440  | 0  |     while(Collation::isSpecialCE32(ce32)) { | 
441  | 0  |         switch(Collation::tagFromCE32(ce32)) { | 
442  | 0  |         case Collation::LATIN_EXPANSION_TAG:  | 
443  | 0  |         case Collation::BUILDER_DATA_TAG:  | 
444  | 0  |         case Collation::PREFIX_TAG:  | 
445  | 0  |         case Collation::CONTRACTION_TAG:  | 
446  | 0  |         case Collation::HANGUL_TAG:  | 
447  | 0  |         case Collation::LEAD_SURROGATE_TAG:  | 
448  | 0  |             errorCode = U_UNSUPPORTED_ERROR;  | 
449  | 0  |             return 0;  | 
450  | 0  |         case Collation::FALLBACK_TAG:  | 
451  | 0  |         case Collation::RESERVED_TAG_3:  | 
452  | 0  |             errorCode = U_INTERNAL_PROGRAM_ERROR;  | 
453  | 0  |             return 0;  | 
454  | 0  |         case Collation::LONG_PRIMARY_TAG:  | 
455  | 0  |             return Collation::ceFromLongPrimaryCE32(ce32);  | 
456  | 0  |         case Collation::LONG_SECONDARY_TAG:  | 
457  | 0  |             return Collation::ceFromLongSecondaryCE32(ce32);  | 
458  | 0  |         case Collation::EXPANSION32_TAG:  | 
459  | 0  |             if(Collation::lengthFromCE32(ce32) == 1) { | 
460  | 0  |                 int32_t i = Collation::indexFromCE32(ce32);  | 
461  | 0  |                 ce32 = fromBase ? base->ce32s[i] : ce32s.elementAti(i);  | 
462  | 0  |                 break;  | 
463  | 0  |             } else { | 
464  | 0  |                 errorCode = U_UNSUPPORTED_ERROR;  | 
465  | 0  |                 return 0;  | 
466  | 0  |             }  | 
467  | 0  |         case Collation::EXPANSION_TAG: { | 
468  | 0  |             if(Collation::lengthFromCE32(ce32) == 1) { | 
469  | 0  |                 int32_t i = Collation::indexFromCE32(ce32);  | 
470  | 0  |                 return fromBase ? base->ces[i] : ce64s.elementAti(i);  | 
471  | 0  |             } else { | 
472  | 0  |                 errorCode = U_UNSUPPORTED_ERROR;  | 
473  | 0  |                 return 0;  | 
474  | 0  |             }  | 
475  | 0  |         }  | 
476  | 0  |         case Collation::DIGIT_TAG:  | 
477  |  |             // Fetch the non-numeric-collation CE32 and continue.  | 
478  | 0  |             ce32 = ce32s.elementAti(Collation::indexFromCE32(ce32));  | 
479  | 0  |             break;  | 
480  | 0  |         case Collation::U0000_TAG:  | 
481  | 0  |             U_ASSERT(c == 0);  | 
482  |  |             // Fetch the normal ce32 for U+0000 and continue.  | 
483  | 0  |             ce32 = fromBase ? base->ce32s[0] : ce32s.elementAti(0);  | 
484  | 0  |             break;  | 
485  | 0  |         case Collation::OFFSET_TAG:  | 
486  | 0  |             ce32 = getCE32FromOffsetCE32(fromBase, c, ce32);  | 
487  | 0  |             break;  | 
488  | 0  |         case Collation::IMPLICIT_TAG:  | 
489  | 0  |             return Collation::unassignedCEFromCodePoint(c);  | 
490  | 0  |         }  | 
491  | 0  |     }  | 
492  | 0  |     return Collation::ceFromSimpleCE32(ce32);  | 
493  | 0  | }  | 
494  |  |  | 
495  |  | int32_t  | 
496  | 0  | CollationDataBuilder::addCE(int64_t ce, UErrorCode &errorCode) { | 
497  | 0  |     int32_t length = ce64s.size();  | 
498  | 0  |     for(int32_t i = 0; i < length; ++i) { | 
499  | 0  |         if(ce == ce64s.elementAti(i)) { return i; } | 
500  | 0  |     }  | 
501  | 0  |     ce64s.addElement(ce, errorCode);  | 
502  | 0  |     return length;  | 
503  | 0  | }  | 
504  |  |  | 
505  |  | int32_t  | 
506  | 0  | CollationDataBuilder::addCE32(uint32_t ce32, UErrorCode &errorCode) { | 
507  | 0  |     int32_t length = ce32s.size();  | 
508  | 0  |     for(int32_t i = 0; i < length; ++i) { | 
509  | 0  |         if(ce32 == (uint32_t)ce32s.elementAti(i)) { return i; } | 
510  | 0  |     }  | 
511  | 0  |     ce32s.addElement((int32_t)ce32, errorCode);    | 
512  | 0  |     return length;  | 
513  | 0  | }  | 
514  |  |  | 
515  |  | int32_t  | 
516  |  | CollationDataBuilder::addConditionalCE32(const UnicodeString &context, uint32_t ce32,  | 
517  | 0  |                                          UErrorCode &errorCode) { | 
518  | 0  |     if(U_FAILURE(errorCode)) { return -1; } | 
519  | 0  |     U_ASSERT(!context.isEmpty());  | 
520  | 0  |     int32_t index = conditionalCE32s.size();  | 
521  | 0  |     if(index > Collation::MAX_INDEX) { | 
522  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
523  | 0  |         return -1;  | 
524  | 0  |     }  | 
525  | 0  |     ConditionalCE32 *cond = new ConditionalCE32(context, ce32);  | 
526  | 0  |     if(cond == NULL) { | 
527  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
528  | 0  |         return -1;  | 
529  | 0  |     }  | 
530  | 0  |     conditionalCE32s.addElementX(cond, errorCode);  | 
531  | 0  |     return index;  | 
532  | 0  | }  | 
533  |  |  | 
534  |  | void  | 
535  |  | CollationDataBuilder::add(const UnicodeString &prefix, const UnicodeString &s,  | 
536  |  |                           const int64_t ces[], int32_t cesLength,  | 
537  | 0  |                           UErrorCode &errorCode) { | 
538  | 0  |     uint32_t ce32 = encodeCEs(ces, cesLength, errorCode);  | 
539  | 0  |     addCE32(prefix, s, ce32, errorCode);  | 
540  | 0  | }  | 
541  |  |  | 
542  |  | void  | 
543  |  | CollationDataBuilder::addCE32(const UnicodeString &prefix, const UnicodeString &s,  | 
544  | 0  |                               uint32_t ce32, UErrorCode &errorCode) { | 
545  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
546  | 0  |     if(s.isEmpty()) { | 
547  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
548  | 0  |         return;  | 
549  | 0  |     }  | 
550  | 0  |     if(trie == NULL || utrie2_isFrozen(trie)) { | 
551  | 0  |         errorCode = U_INVALID_STATE_ERROR;  | 
552  | 0  |         return;  | 
553  | 0  |     }  | 
554  | 0  |     UChar32 c = s.char32At(0);  | 
555  | 0  |     int32_t cLength = U16_LENGTH(c);  | 
556  | 0  |     uint32_t oldCE32 = utrie2_get32(trie, c);  | 
557  | 0  |     UBool hasContext = !prefix.isEmpty() || s.length() > cLength;  | 
558  | 0  |     if(oldCE32 == Collation::FALLBACK_CE32) { | 
559  |  |         // First tailoring for c.  | 
560  |  |         // If c has contextual base mappings or if we add a contextual mapping,  | 
561  |  |         // then copy the base mappings.  | 
562  |  |         // Otherwise we just override the base mapping.  | 
563  | 0  |         uint32_t baseCE32 = base->getFinalCE32(base->getCE32(c));  | 
564  | 0  |         if(hasContext || Collation::ce32HasContext(baseCE32)) { | 
565  | 0  |             oldCE32 = copyFromBaseCE32(c, baseCE32, TRUE, errorCode);  | 
566  | 0  |             utrie2_set32(trie, c, oldCE32, &errorCode);  | 
567  | 0  |             if(U_FAILURE(errorCode)) { return; } | 
568  | 0  |         }  | 
569  | 0  |     }  | 
570  | 0  |     if(!hasContext) { | 
571  |  |         // No prefix, no contraction.  | 
572  | 0  |         if(!isBuilderContextCE32(oldCE32)) { | 
573  | 0  |             utrie2_set32(trie, c, ce32, &errorCode);  | 
574  | 0  |         } else { | 
575  | 0  |             ConditionalCE32 *cond = getConditionalCE32ForCE32(oldCE32);  | 
576  | 0  |             cond->builtCE32 = Collation::NO_CE32;  | 
577  | 0  |             cond->ce32 = ce32;  | 
578  | 0  |         }  | 
579  | 0  |     } else { | 
580  | 0  |         ConditionalCE32 *cond;  | 
581  | 0  |         if(!isBuilderContextCE32(oldCE32)) { | 
582  |  |             // Replace the simple oldCE32 with a builder context CE32  | 
583  |  |             // pointing to a new ConditionalCE32 list head.  | 
584  | 0  |             int32_t index = addConditionalCE32(UnicodeString((UChar)0), oldCE32, errorCode);  | 
585  | 0  |             if(U_FAILURE(errorCode)) { return; } | 
586  | 0  |             uint32_t contextCE32 = makeBuilderContextCE32(index);  | 
587  | 0  |             utrie2_set32(trie, c, contextCE32, &errorCode);  | 
588  | 0  |             contextChars.add(c);  | 
589  | 0  |             cond = getConditionalCE32(index);  | 
590  | 0  |         } else { | 
591  | 0  |             cond = getConditionalCE32ForCE32(oldCE32);  | 
592  | 0  |             cond->builtCE32 = Collation::NO_CE32;  | 
593  | 0  |         }  | 
594  | 0  |         UnicodeString suffix(s, cLength);  | 
595  | 0  |         UnicodeString context((UChar)prefix.length());  | 
596  | 0  |         context.append(prefix).append(suffix);  | 
597  | 0  |         unsafeBackwardSet.addAll(suffix);  | 
598  | 0  |         for(;;) { | 
599  |  |             // invariant: context > cond->context  | 
600  | 0  |             int32_t next = cond->next;  | 
601  | 0  |             if(next < 0) { | 
602  |  |                 // Append a new ConditionalCE32 after cond.  | 
603  | 0  |                 int32_t index = addConditionalCE32(context, ce32, errorCode);  | 
604  | 0  |                 if(U_FAILURE(errorCode)) { return; } | 
605  | 0  |                 cond->next = index;  | 
606  | 0  |                 break;  | 
607  | 0  |             }  | 
608  | 0  |             ConditionalCE32 *nextCond = getConditionalCE32(next);  | 
609  | 0  |             int8_t cmp = context.compare(nextCond->context);  | 
610  | 0  |             if(cmp < 0) { | 
611  |  |                 // Insert a new ConditionalCE32 between cond and nextCond.  | 
612  | 0  |                 int32_t index = addConditionalCE32(context, ce32, errorCode);  | 
613  | 0  |                 if(U_FAILURE(errorCode)) { return; } | 
614  | 0  |                 cond->next = index;  | 
615  | 0  |                 getConditionalCE32(index)->next = next;  | 
616  | 0  |                 break;  | 
617  | 0  |             } else if(cmp == 0) { | 
618  |  |                 // Same context as before, overwrite its ce32.  | 
619  | 0  |                 nextCond->ce32 = ce32;  | 
620  | 0  |                 break;  | 
621  | 0  |             }  | 
622  | 0  |             cond = nextCond;  | 
623  | 0  |         }  | 
624  | 0  |     }  | 
625  | 0  |     modified = TRUE;  | 
626  | 0  | }  | 
627  |  |  | 
628  |  | uint32_t  | 
629  | 0  | CollationDataBuilder::encodeOneCEAsCE32(int64_t ce) { | 
630  | 0  |     uint32_t p = (uint32_t)(ce >> 32);  | 
631  | 0  |     uint32_t lower32 = (uint32_t)ce;  | 
632  | 0  |     uint32_t t = (uint32_t)(ce & 0xffff);  | 
633  | 0  |     U_ASSERT((t & 0xc000) != 0xc000);  // Impossible case bits 11 mark special CE32s.  | 
634  | 0  |     if((ce & INT64_C(0xffff00ff00ff)) == 0) { | 
635  |  |         // normal form ppppsstt  | 
636  | 0  |         return p | (lower32 >> 16) | (t >> 8);  | 
637  | 0  |     } else if((ce & INT64_C(0xffffffffff)) == Collation::COMMON_SEC_AND_TER_CE) { | 
638  |  |         // long-primary form ppppppC1  | 
639  | 0  |         return Collation::makeLongPrimaryCE32(p);  | 
640  | 0  |     } else if(p == 0 && (t & 0xff) == 0) { | 
641  |  |         // long-secondary form ssssttC2  | 
642  | 0  |         return Collation::makeLongSecondaryCE32(lower32);  | 
643  | 0  |     }  | 
644  | 0  |     return Collation::NO_CE32;  | 
645  | 0  | }  | 
646  |  |  | 
647  |  | uint32_t  | 
648  | 0  | CollationDataBuilder::encodeOneCE(int64_t ce, UErrorCode &errorCode) { | 
649  |  |     // Try to encode one CE as one CE32.  | 
650  | 0  |     uint32_t ce32 = encodeOneCEAsCE32(ce);  | 
651  | 0  |     if(ce32 != Collation::NO_CE32) { return ce32; } | 
652  | 0  |     int32_t index = addCE(ce, errorCode);  | 
653  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
654  | 0  |     if(index > Collation::MAX_INDEX) { | 
655  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
656  | 0  |         return 0;  | 
657  | 0  |     }  | 
658  | 0  |     return Collation::makeCE32FromTagIndexAndLength(Collation::EXPANSION_TAG, index, 1);  | 
659  | 0  | }  | 
660  |  |  | 
661  |  | uint32_t  | 
662  |  | CollationDataBuilder::encodeCEs(const int64_t ces[], int32_t cesLength,  | 
663  | 0  |                                 UErrorCode &errorCode) { | 
664  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
665  | 0  |     if(cesLength < 0 || cesLength > Collation::MAX_EXPANSION_LENGTH) { | 
666  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
667  | 0  |         return 0;  | 
668  | 0  |     }  | 
669  | 0  |     if(trie == NULL || utrie2_isFrozen(trie)) { | 
670  | 0  |         errorCode = U_INVALID_STATE_ERROR;  | 
671  | 0  |         return 0;  | 
672  | 0  |     }  | 
673  | 0  |     if(cesLength == 0) { | 
674  |  |         // Convenience: We cannot map to nothing, but we can map to a completely ignorable CE.  | 
675  |  |         // Do this here so that callers need not do it.  | 
676  | 0  |         return encodeOneCEAsCE32(0);  | 
677  | 0  |     } else if(cesLength == 1) { | 
678  | 0  |         return encodeOneCE(ces[0], errorCode);  | 
679  | 0  |     } else if(cesLength == 2) { | 
680  |  |         // Try to encode two CEs as one CE32.  | 
681  | 0  |         int64_t ce0 = ces[0];  | 
682  | 0  |         int64_t ce1 = ces[1];  | 
683  | 0  |         uint32_t p0 = (uint32_t)(ce0 >> 32);  | 
684  | 0  |         if((ce0 & INT64_C(0xffffffffff00ff)) == Collation::COMMON_SECONDARY_CE &&  | 
685  | 0  |                 (ce1 & INT64_C(0xffffffff00ffffff)) == Collation::COMMON_TERTIARY_CE &&  | 
686  | 0  |                 p0 != 0) { | 
687  |  |             // Latin mini expansion  | 
688  | 0  |             return  | 
689  | 0  |                 p0 |  | 
690  | 0  |                 (((uint32_t)ce0 & 0xff00u) << 8) |  | 
691  | 0  |                 (uint32_t)(ce1 >> 16) |  | 
692  | 0  |                 Collation::SPECIAL_CE32_LOW_BYTE |  | 
693  | 0  |                 Collation::LATIN_EXPANSION_TAG;  | 
694  | 0  |         }  | 
695  | 0  |     }  | 
696  |  |     // Try to encode two or more CEs as CE32s.  | 
697  | 0  |     int32_t newCE32s[Collation::MAX_EXPANSION_LENGTH];  | 
698  | 0  |     for(int32_t i = 0;; ++i) { | 
699  | 0  |         if(i == cesLength) { | 
700  | 0  |             return encodeExpansion32(newCE32s, cesLength, errorCode);  | 
701  | 0  |         }  | 
702  | 0  |         uint32_t ce32 = encodeOneCEAsCE32(ces[i]);  | 
703  | 0  |         if(ce32 == Collation::NO_CE32) { break; } | 
704  | 0  |         newCE32s[i] = (int32_t)ce32;  | 
705  | 0  |     }  | 
706  | 0  |     return encodeExpansion(ces, cesLength, errorCode);  | 
707  | 0  | }  | 
708  |  |  | 
709  |  | uint32_t  | 
710  | 0  | CollationDataBuilder::encodeExpansion(const int64_t ces[], int32_t length, UErrorCode &errorCode) { | 
711  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
712  |  |     // See if this sequence of CEs has already been stored.  | 
713  | 0  |     int64_t first = ces[0];  | 
714  | 0  |     int32_t ce64sMax = ce64s.size() - length;  | 
715  | 0  |     for(int32_t i = 0; i <= ce64sMax; ++i) { | 
716  | 0  |         if(first == ce64s.elementAti(i)) { | 
717  | 0  |             if(i > Collation::MAX_INDEX) { | 
718  | 0  |                 errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
719  | 0  |                 return 0;  | 
720  | 0  |             }  | 
721  | 0  |             for(int32_t j = 1;; ++j) { | 
722  | 0  |                 if(j == length) { | 
723  | 0  |                     return Collation::makeCE32FromTagIndexAndLength(  | 
724  | 0  |                             Collation::EXPANSION_TAG, i, length);  | 
725  | 0  |                 }  | 
726  | 0  |                 if(ce64s.elementAti(i + j) != ces[j]) { break; } | 
727  | 0  |             }  | 
728  | 0  |         }  | 
729  | 0  |     }  | 
730  |  |     // Store the new sequence.  | 
731  | 0  |     int32_t i = ce64s.size();  | 
732  | 0  |     if(i > Collation::MAX_INDEX) { | 
733  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
734  | 0  |         return 0;  | 
735  | 0  |     }  | 
736  | 0  |     for(int32_t j = 0; j < length; ++j) { | 
737  | 0  |         ce64s.addElement(ces[j], errorCode);  | 
738  | 0  |     }  | 
739  | 0  |     return Collation::makeCE32FromTagIndexAndLength(Collation::EXPANSION_TAG, i, length);  | 
740  | 0  | }  | 
741  |  |  | 
742  |  | uint32_t  | 
743  |  | CollationDataBuilder::encodeExpansion32(const int32_t newCE32s[], int32_t length,  | 
744  | 0  |                                         UErrorCode &errorCode) { | 
745  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
746  |  |     // See if this sequence of CE32s has already been stored.  | 
747  | 0  |     int32_t first = newCE32s[0];  | 
748  | 0  |     int32_t ce32sMax = ce32s.size() - length;  | 
749  | 0  |     for(int32_t i = 0; i <= ce32sMax; ++i) { | 
750  | 0  |         if(first == ce32s.elementAti(i)) { | 
751  | 0  |             if(i > Collation::MAX_INDEX) { | 
752  | 0  |                 errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
753  | 0  |                 return 0;  | 
754  | 0  |             }  | 
755  | 0  |             for(int32_t j = 1;; ++j) { | 
756  | 0  |                 if(j == length) { | 
757  | 0  |                     return Collation::makeCE32FromTagIndexAndLength(  | 
758  | 0  |                             Collation::EXPANSION32_TAG, i, length);  | 
759  | 0  |                 }  | 
760  | 0  |                 if(ce32s.elementAti(i + j) != newCE32s[j]) { break; } | 
761  | 0  |             }  | 
762  | 0  |         }  | 
763  | 0  |     }  | 
764  |  |     // Store the new sequence.  | 
765  | 0  |     int32_t i = ce32s.size();  | 
766  | 0  |     if(i > Collation::MAX_INDEX) { | 
767  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
768  | 0  |         return 0;  | 
769  | 0  |     }  | 
770  | 0  |     for(int32_t j = 0; j < length; ++j) { | 
771  | 0  |         ce32s.addElement(newCE32s[j], errorCode);  | 
772  | 0  |     }  | 
773  | 0  |     return Collation::makeCE32FromTagIndexAndLength(Collation::EXPANSION32_TAG, i, length);  | 
774  | 0  | }  | 
775  |  |  | 
776  |  | uint32_t  | 
777  |  | CollationDataBuilder::copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withContext,  | 
778  | 0  |                                        UErrorCode &errorCode) { | 
779  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
780  | 0  |     if(!Collation::isSpecialCE32(ce32)) { return ce32; } | 
781  | 0  |     switch(Collation::tagFromCE32(ce32)) { | 
782  | 0  |     case Collation::LONG_PRIMARY_TAG:  | 
783  | 0  |     case Collation::LONG_SECONDARY_TAG:  | 
784  | 0  |     case Collation::LATIN_EXPANSION_TAG:  | 
785  |  |         // copy as is  | 
786  | 0  |         break;  | 
787  | 0  |     case Collation::EXPANSION32_TAG: { | 
788  | 0  |         const uint32_t *baseCE32s = base->ce32s + Collation::indexFromCE32(ce32);  | 
789  | 0  |         int32_t length = Collation::lengthFromCE32(ce32);  | 
790  | 0  |         ce32 = encodeExpansion32(  | 
791  | 0  |             reinterpret_cast<const int32_t *>(baseCE32s), length, errorCode);  | 
792  | 0  |         break;  | 
793  | 0  |     }  | 
794  | 0  |     case Collation::EXPANSION_TAG: { | 
795  | 0  |         const int64_t *baseCEs = base->ces + Collation::indexFromCE32(ce32);  | 
796  | 0  |         int32_t length = Collation::lengthFromCE32(ce32);  | 
797  | 0  |         ce32 = encodeExpansion(baseCEs, length, errorCode);  | 
798  | 0  |         break;  | 
799  | 0  |     }  | 
800  | 0  |     case Collation::PREFIX_TAG: { | 
801  |  |         // Flatten prefixes and nested suffixes (contractions)  | 
802  |  |         // into a linear list of ConditionalCE32.  | 
803  | 0  |         const UChar *p = base->contexts + Collation::indexFromCE32(ce32);  | 
804  | 0  |         ce32 = CollationData::readCE32(p);  // Default if no prefix match.  | 
805  | 0  |         if(!withContext) { | 
806  | 0  |             return copyFromBaseCE32(c, ce32, FALSE, errorCode);  | 
807  | 0  |         }  | 
808  | 0  |         ConditionalCE32 head;  | 
809  | 0  |         UnicodeString context((UChar)0);  | 
810  | 0  |         int32_t index;  | 
811  | 0  |         if(Collation::isContractionCE32(ce32)) { | 
812  | 0  |             index = copyContractionsFromBaseCE32(context, c, ce32, &head, errorCode);  | 
813  | 0  |         } else { | 
814  | 0  |             ce32 = copyFromBaseCE32(c, ce32, TRUE, errorCode);  | 
815  | 0  |             head.next = index = addConditionalCE32(context, ce32, errorCode);  | 
816  | 0  |         }  | 
817  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
818  | 0  |         ConditionalCE32 *cond = getConditionalCE32(index);  // the last ConditionalCE32 so far  | 
819  | 0  |         UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);  | 
820  | 0  |         while(prefixes.next(errorCode)) { | 
821  | 0  |             context = prefixes.getString();  | 
822  | 0  |             context.reverse();  | 
823  | 0  |             context.insert(0, (UChar)context.length());  | 
824  | 0  |             ce32 = (uint32_t)prefixes.getValue();  | 
825  | 0  |             if(Collation::isContractionCE32(ce32)) { | 
826  | 0  |                 index = copyContractionsFromBaseCE32(context, c, ce32, cond, errorCode);  | 
827  | 0  |             } else { | 
828  | 0  |                 ce32 = copyFromBaseCE32(c, ce32, TRUE, errorCode);  | 
829  | 0  |                 cond->next = index = addConditionalCE32(context, ce32, errorCode);  | 
830  | 0  |             }  | 
831  | 0  |             if(U_FAILURE(errorCode)) { return 0; } | 
832  | 0  |             cond = getConditionalCE32(index);  | 
833  | 0  |         }  | 
834  | 0  |         ce32 = makeBuilderContextCE32(head.next);  | 
835  | 0  |         contextChars.add(c);  | 
836  | 0  |         break;  | 
837  | 0  |     }  | 
838  | 0  |     case Collation::CONTRACTION_TAG: { | 
839  | 0  |         if(!withContext) { | 
840  | 0  |             const UChar *p = base->contexts + Collation::indexFromCE32(ce32);  | 
841  | 0  |             ce32 = CollationData::readCE32(p);  // Default if no suffix match.  | 
842  | 0  |             return copyFromBaseCE32(c, ce32, FALSE, errorCode);  | 
843  | 0  |         }  | 
844  | 0  |         ConditionalCE32 head;  | 
845  | 0  |         UnicodeString context((UChar)0);  | 
846  | 0  |         copyContractionsFromBaseCE32(context, c, ce32, &head, errorCode);  | 
847  | 0  |         ce32 = makeBuilderContextCE32(head.next);  | 
848  | 0  |         contextChars.add(c);  | 
849  | 0  |         break;  | 
850  | 0  |     }  | 
851  | 0  |     case Collation::HANGUL_TAG:  | 
852  | 0  |         errorCode = U_UNSUPPORTED_ERROR;  // We forbid tailoring of Hangul syllables.  | 
853  | 0  |         break;  | 
854  | 0  |     case Collation::OFFSET_TAG:  | 
855  | 0  |         ce32 = getCE32FromOffsetCE32(TRUE, c, ce32);  | 
856  | 0  |         break;  | 
857  | 0  |     case Collation::IMPLICIT_TAG:  | 
858  | 0  |         ce32 = encodeOneCE(Collation::unassignedCEFromCodePoint(c), errorCode);  | 
859  | 0  |         break;  | 
860  | 0  |     default:  | 
861  | 0  |         UPRV_UNREACHABLE;  // require ce32 == base->getFinalCE32(ce32)  | 
862  | 0  |     }  | 
863  | 0  |     return ce32;  | 
864  | 0  | }  | 
865  |  |  | 
866  |  | int32_t  | 
867  |  | CollationDataBuilder::copyContractionsFromBaseCE32(UnicodeString &context, UChar32 c, uint32_t ce32,  | 
868  | 0  |                                                    ConditionalCE32 *cond, UErrorCode &errorCode) { | 
869  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
870  | 0  |     const UChar *p = base->contexts + Collation::indexFromCE32(ce32);  | 
871  | 0  |     int32_t index;  | 
872  | 0  |     if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) { | 
873  |  |         // No match on the single code point.  | 
874  |  |         // We are underneath a prefix, and the default mapping is just  | 
875  |  |         // a fallback to the mappings for a shorter prefix.  | 
876  | 0  |         U_ASSERT(context.length() > 1);  | 
877  | 0  |         index = -1;  | 
878  | 0  |     } else { | 
879  | 0  |         ce32 = CollationData::readCE32(p);  // Default if no suffix match.  | 
880  | 0  |         U_ASSERT(!Collation::isContractionCE32(ce32));  | 
881  | 0  |         ce32 = copyFromBaseCE32(c, ce32, TRUE, errorCode);  | 
882  | 0  |         cond->next = index = addConditionalCE32(context, ce32, errorCode);  | 
883  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
884  | 0  |         cond = getConditionalCE32(index);  | 
885  | 0  |     }  | 
886  |  |  | 
887  | 0  |     int32_t suffixStart = context.length();  | 
888  | 0  |     UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);  | 
889  | 0  |     while(suffixes.next(errorCode)) { | 
890  | 0  |         context.append(suffixes.getString());  | 
891  | 0  |         ce32 = copyFromBaseCE32(c, (uint32_t)suffixes.getValue(), TRUE, errorCode);  | 
892  | 0  |         cond->next = index = addConditionalCE32(context, ce32, errorCode);  | 
893  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
894  |  |         // No need to update the unsafeBackwardSet because the tailoring set  | 
895  |  |         // is already a copy of the base set.  | 
896  | 0  |         cond = getConditionalCE32(index);  | 
897  | 0  |         context.truncate(suffixStart);  | 
898  | 0  |     }  | 
899  | 0  |     U_ASSERT(index >= 0);  | 
900  | 0  |     return index;  | 
901  | 0  | }  | 
902  |  |  | 
903  |  | class CopyHelper { | 
904  |  | public:  | 
905  |  |     CopyHelper(const CollationDataBuilder &s, CollationDataBuilder &d,  | 
906  |  |                const CollationDataBuilder::CEModifier &m, UErrorCode &initialErrorCode)  | 
907  | 0  |             : src(s), dest(d), modifier(m),  | 
908  | 0  |               errorCode(initialErrorCode) {} | 
909  |  |  | 
910  | 0  |     UBool copyRangeCE32(UChar32 start, UChar32 end, uint32_t ce32) { | 
911  | 0  |         ce32 = copyCE32(ce32);  | 
912  | 0  |         utrie2_setRange32(dest.trie, start, end, ce32, TRUE, &errorCode);  | 
913  | 0  |         if(CollationDataBuilder::isBuilderContextCE32(ce32)) { | 
914  | 0  |             dest.contextChars.add(start, end);  | 
915  | 0  |         }  | 
916  | 0  |         return U_SUCCESS(errorCode);  | 
917  | 0  |     }  | 
918  |  |  | 
919  | 0  |     uint32_t copyCE32(uint32_t ce32) { | 
920  | 0  |         if(!Collation::isSpecialCE32(ce32)) { | 
921  | 0  |             int64_t ce = modifier.modifyCE32(ce32);  | 
922  | 0  |             if(ce != Collation::NO_CE) { | 
923  | 0  |                 ce32 = dest.encodeOneCE(ce, errorCode);  | 
924  | 0  |             }  | 
925  | 0  |         } else { | 
926  | 0  |             int32_t tag = Collation::tagFromCE32(ce32);  | 
927  | 0  |             if(tag == Collation::EXPANSION32_TAG) { | 
928  | 0  |                 const uint32_t *srcCE32s = reinterpret_cast<uint32_t *>(src.ce32s.getBuffer());  | 
929  | 0  |                 srcCE32s += Collation::indexFromCE32(ce32);  | 
930  | 0  |                 int32_t length = Collation::lengthFromCE32(ce32);  | 
931  |  |                 // Inspect the source CE32s. Just copy them if none are modified.  | 
932  |  |                 // Otherwise copy to modifiedCEs, with modifications.  | 
933  | 0  |                 UBool isModified = FALSE;  | 
934  | 0  |                 for(int32_t i = 0; i < length; ++i) { | 
935  | 0  |                     ce32 = srcCE32s[i];  | 
936  | 0  |                     int64_t ce;  | 
937  | 0  |                     if(Collation::isSpecialCE32(ce32) ||  | 
938  | 0  |                             (ce = modifier.modifyCE32(ce32)) == Collation::NO_CE) { | 
939  | 0  |                         if(isModified) { | 
940  | 0  |                             modifiedCEs[i] = Collation::ceFromCE32(ce32);  | 
941  | 0  |                         }  | 
942  | 0  |                     } else { | 
943  | 0  |                         if(!isModified) { | 
944  | 0  |                             for(int32_t j = 0; j < i; ++j) { | 
945  | 0  |                                 modifiedCEs[j] = Collation::ceFromCE32(srcCE32s[j]);  | 
946  | 0  |                             }  | 
947  | 0  |                             isModified = TRUE;  | 
948  | 0  |                         }  | 
949  | 0  |                         modifiedCEs[i] = ce;  | 
950  | 0  |                     }  | 
951  | 0  |                 }  | 
952  | 0  |                 if(isModified) { | 
953  | 0  |                     ce32 = dest.encodeCEs(modifiedCEs, length, errorCode);  | 
954  | 0  |                 } else { | 
955  | 0  |                     ce32 = dest.encodeExpansion32(  | 
956  | 0  |                         reinterpret_cast<const int32_t *>(srcCE32s), length, errorCode);  | 
957  | 0  |                 }  | 
958  | 0  |             } else if(tag == Collation::EXPANSION_TAG) { | 
959  | 0  |                 const int64_t *srcCEs = src.ce64s.getBuffer();  | 
960  | 0  |                 srcCEs += Collation::indexFromCE32(ce32);  | 
961  | 0  |                 int32_t length = Collation::lengthFromCE32(ce32);  | 
962  |  |                 // Inspect the source CEs. Just copy them if none are modified.  | 
963  |  |                 // Otherwise copy to modifiedCEs, with modifications.  | 
964  | 0  |                 UBool isModified = FALSE;  | 
965  | 0  |                 for(int32_t i = 0; i < length; ++i) { | 
966  | 0  |                     int64_t srcCE = srcCEs[i];  | 
967  | 0  |                     int64_t ce = modifier.modifyCE(srcCE);  | 
968  | 0  |                     if(ce == Collation::NO_CE) { | 
969  | 0  |                         if(isModified) { | 
970  | 0  |                             modifiedCEs[i] = srcCE;  | 
971  | 0  |                         }  | 
972  | 0  |                     } else { | 
973  | 0  |                         if(!isModified) { | 
974  | 0  |                             for(int32_t j = 0; j < i; ++j) { | 
975  | 0  |                                 modifiedCEs[j] = srcCEs[j];  | 
976  | 0  |                             }  | 
977  | 0  |                             isModified = TRUE;  | 
978  | 0  |                         }  | 
979  | 0  |                         modifiedCEs[i] = ce;  | 
980  | 0  |                     }  | 
981  | 0  |                 }  | 
982  | 0  |                 if(isModified) { | 
983  | 0  |                     ce32 = dest.encodeCEs(modifiedCEs, length, errorCode);  | 
984  | 0  |                 } else { | 
985  | 0  |                     ce32 = dest.encodeExpansion(srcCEs, length, errorCode);  | 
986  | 0  |                 }  | 
987  | 0  |             } else if(tag == Collation::BUILDER_DATA_TAG) { | 
988  |  |                 // Copy the list of ConditionalCE32.  | 
989  | 0  |                 ConditionalCE32 *cond = src.getConditionalCE32ForCE32(ce32);  | 
990  | 0  |                 U_ASSERT(!cond->hasContext());  | 
991  | 0  |                 int32_t destIndex = dest.addConditionalCE32(  | 
992  | 0  |                         cond->context, copyCE32(cond->ce32), errorCode);  | 
993  | 0  |                 ce32 = CollationDataBuilder::makeBuilderContextCE32(destIndex);  | 
994  | 0  |                 while(cond->next >= 0) { | 
995  | 0  |                     cond = src.getConditionalCE32(cond->next);  | 
996  | 0  |                     ConditionalCE32 *prevDestCond = dest.getConditionalCE32(destIndex);  | 
997  | 0  |                     destIndex = dest.addConditionalCE32(  | 
998  | 0  |                             cond->context, copyCE32(cond->ce32), errorCode);  | 
999  | 0  |                     int32_t suffixStart = cond->prefixLength() + 1;  | 
1000  | 0  |                     dest.unsafeBackwardSet.addAll(cond->context.tempSubString(suffixStart));  | 
1001  | 0  |                     prevDestCond->next = destIndex;  | 
1002  | 0  |                 }  | 
1003  | 0  |             } else { | 
1004  |  |                 // Just copy long CEs and Latin mini expansions (and other expected values) as is,  | 
1005  |  |                 // assuming that the modifier would not modify them.  | 
1006  | 0  |                 U_ASSERT(tag == Collation::LONG_PRIMARY_TAG ||  | 
1007  | 0  |                         tag == Collation::LONG_SECONDARY_TAG ||  | 
1008  | 0  |                         tag == Collation::LATIN_EXPANSION_TAG ||  | 
1009  | 0  |                         tag == Collation::HANGUL_TAG);  | 
1010  | 0  |             }  | 
1011  | 0  |         }  | 
1012  | 0  |         return ce32;  | 
1013  | 0  |     }  | 
1014  |  |  | 
1015  |  |     const CollationDataBuilder &src;  | 
1016  |  |     CollationDataBuilder &dest;  | 
1017  |  |     const CollationDataBuilder::CEModifier &modifier;  | 
1018  |  |     int64_t modifiedCEs[Collation::MAX_EXPANSION_LENGTH];  | 
1019  |  |     UErrorCode errorCode;  | 
1020  |  | };  | 
1021  |  |  | 
1022  |  | U_CDECL_BEGIN  | 
1023  |  |  | 
1024  |  | static UBool U_CALLCONV  | 
1025  | 0  | enumRangeForCopy(const void *context, UChar32 start, UChar32 end, uint32_t value) { | 
1026  | 0  |     return  | 
1027  | 0  |         value == Collation::UNASSIGNED_CE32 || value == Collation::FALLBACK_CE32 ||  | 
1028  | 0  |         ((CopyHelper *)context)->copyRangeCE32(start, end, value);  | 
1029  | 0  | }  | 
1030  |  |  | 
1031  |  | U_CDECL_END  | 
1032  |  |  | 
1033  |  | void  | 
1034  |  | CollationDataBuilder::copyFrom(const CollationDataBuilder &src, const CEModifier &modifier,  | 
1035  | 0  |                                UErrorCode &errorCode) { | 
1036  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1037  | 0  |     if(trie == NULL || utrie2_isFrozen(trie)) { | 
1038  | 0  |         errorCode = U_INVALID_STATE_ERROR;  | 
1039  | 0  |         return;  | 
1040  | 0  |     }  | 
1041  | 0  |     CopyHelper helper(src, *this, modifier, errorCode);  | 
1042  | 0  |     utrie2_enum(src.trie, NULL, enumRangeForCopy, &helper);  | 
1043  | 0  |     errorCode = helper.errorCode;  | 
1044  |  |     // Update the contextChars and the unsafeBackwardSet while copying,  | 
1045  |  |     // in case a character had conditional mappings in the source builder  | 
1046  |  |     // and they were removed later.  | 
1047  | 0  |     modified |= src.modified;  | 
1048  | 0  | }  | 
1049  |  |  | 
1050  |  | void  | 
1051  | 0  | CollationDataBuilder::optimize(const UnicodeSet &set, UErrorCode &errorCode) { | 
1052  | 0  |     if(U_FAILURE(errorCode) || set.isEmpty()) { return; } | 
1053  | 0  |     UnicodeSetIterator iter(set);  | 
1054  | 0  |     while(iter.next() && !iter.isString()) { | 
1055  | 0  |         UChar32 c = iter.getCodepoint();  | 
1056  | 0  |         uint32_t ce32 = utrie2_get32(trie, c);  | 
1057  | 0  |         if(ce32 == Collation::FALLBACK_CE32) { | 
1058  | 0  |             ce32 = base->getFinalCE32(base->getCE32(c));  | 
1059  | 0  |             ce32 = copyFromBaseCE32(c, ce32, TRUE, errorCode);  | 
1060  | 0  |             utrie2_set32(trie, c, ce32, &errorCode);  | 
1061  | 0  |         }  | 
1062  | 0  |     }  | 
1063  | 0  |     modified = TRUE;  | 
1064  | 0  | }  | 
1065  |  |  | 
1066  |  | void  | 
1067  | 0  | CollationDataBuilder::suppressContractions(const UnicodeSet &set, UErrorCode &errorCode) { | 
1068  | 0  |     if(U_FAILURE(errorCode) || set.isEmpty()) { return; } | 
1069  | 0  |     UnicodeSetIterator iter(set);  | 
1070  | 0  |     while(iter.next() && !iter.isString()) { | 
1071  | 0  |         UChar32 c = iter.getCodepoint();  | 
1072  | 0  |         uint32_t ce32 = utrie2_get32(trie, c);  | 
1073  | 0  |         if(ce32 == Collation::FALLBACK_CE32) { | 
1074  | 0  |             ce32 = base->getFinalCE32(base->getCE32(c));  | 
1075  | 0  |             if(Collation::ce32HasContext(ce32)) { | 
1076  | 0  |                 ce32 = copyFromBaseCE32(c, ce32, FALSE /* without context */, errorCode);  | 
1077  | 0  |                 utrie2_set32(trie, c, ce32, &errorCode);  | 
1078  | 0  |             }  | 
1079  | 0  |         } else if(isBuilderContextCE32(ce32)) { | 
1080  | 0  |             ce32 = getConditionalCE32ForCE32(ce32)->ce32;  | 
1081  |  |             // Simply abandon the list of ConditionalCE32.  | 
1082  |  |             // The caller will copy this builder in the end,  | 
1083  |  |             // eliminating unreachable data.  | 
1084  | 0  |             utrie2_set32(trie, c, ce32, &errorCode);  | 
1085  | 0  |             contextChars.remove(c);  | 
1086  | 0  |         }  | 
1087  | 0  |     }  | 
1088  | 0  |     modified = TRUE;  | 
1089  | 0  | }  | 
1090  |  |  | 
1091  |  | UBool  | 
1092  | 0  | CollationDataBuilder::getJamoCE32s(uint32_t jamoCE32s[], UErrorCode &errorCode) { | 
1093  | 0  |     if(U_FAILURE(errorCode)) { return FALSE; } | 
1094  | 0  |     UBool anyJamoAssigned = base == NULL;  // always set jamoCE32s in the base data  | 
1095  | 0  |     UBool needToCopyFromBase = FALSE;  | 
1096  | 0  |     for(int32_t j = 0; j < CollationData::JAMO_CE32S_LENGTH; ++j) {  // Count across Jamo types. | 
1097  | 0  |         UChar32 jamo = jamoCpFromIndex(j);  | 
1098  | 0  |         UBool fromBase = FALSE;  | 
1099  | 0  |         uint32_t ce32 = utrie2_get32(trie, jamo);  | 
1100  | 0  |         anyJamoAssigned |= Collation::isAssignedCE32(ce32);  | 
1101  |  |         // TODO: Try to prevent [optimize [Jamo]] from counting as anyJamoAssigned.  | 
1102  |  |         // (As of CLDR 24 [2013] the Korean tailoring does not optimize conjoining Jamo.)  | 
1103  | 0  |         if(ce32 == Collation::FALLBACK_CE32) { | 
1104  | 0  |             fromBase = TRUE;  | 
1105  | 0  |             ce32 = base->getCE32(jamo);  | 
1106  | 0  |         }  | 
1107  | 0  |         if(Collation::isSpecialCE32(ce32)) { | 
1108  | 0  |             switch(Collation::tagFromCE32(ce32)) { | 
1109  | 0  |             case Collation::LONG_PRIMARY_TAG:  | 
1110  | 0  |             case Collation::LONG_SECONDARY_TAG:  | 
1111  | 0  |             case Collation::LATIN_EXPANSION_TAG:  | 
1112  |  |                 // Copy the ce32 as-is.  | 
1113  | 0  |                 break;  | 
1114  | 0  |             case Collation::EXPANSION32_TAG:  | 
1115  | 0  |             case Collation::EXPANSION_TAG:  | 
1116  | 0  |             case Collation::PREFIX_TAG:  | 
1117  | 0  |             case Collation::CONTRACTION_TAG:  | 
1118  | 0  |                 if(fromBase) { | 
1119  |  |                     // Defer copying until we know if anyJamoAssigned.  | 
1120  | 0  |                     ce32 = Collation::FALLBACK_CE32;  | 
1121  | 0  |                     needToCopyFromBase = TRUE;  | 
1122  | 0  |                 }  | 
1123  | 0  |                 break;  | 
1124  | 0  |             case Collation::IMPLICIT_TAG:  | 
1125  |  |                 // An unassigned Jamo should only occur in tests with incomplete bases.  | 
1126  | 0  |                 U_ASSERT(fromBase);  | 
1127  | 0  |                 ce32 = Collation::FALLBACK_CE32;  | 
1128  | 0  |                 needToCopyFromBase = TRUE;  | 
1129  | 0  |                 break;  | 
1130  | 0  |             case Collation::OFFSET_TAG:  | 
1131  | 0  |                 ce32 = getCE32FromOffsetCE32(fromBase, jamo, ce32);  | 
1132  | 0  |                 break;  | 
1133  | 0  |             case Collation::FALLBACK_TAG:  | 
1134  | 0  |             case Collation::RESERVED_TAG_3:  | 
1135  | 0  |             case Collation::BUILDER_DATA_TAG:  | 
1136  | 0  |             case Collation::DIGIT_TAG:  | 
1137  | 0  |             case Collation::U0000_TAG:  | 
1138  | 0  |             case Collation::HANGUL_TAG:  | 
1139  | 0  |             case Collation::LEAD_SURROGATE_TAG:  | 
1140  | 0  |                 errorCode = U_INTERNAL_PROGRAM_ERROR;  | 
1141  | 0  |                 return FALSE;  | 
1142  | 0  |             }  | 
1143  | 0  |         }  | 
1144  | 0  |         jamoCE32s[j] = ce32;  | 
1145  | 0  |     }  | 
1146  | 0  |     if(anyJamoAssigned && needToCopyFromBase) { | 
1147  | 0  |         for(int32_t j = 0; j < CollationData::JAMO_CE32S_LENGTH; ++j) { | 
1148  | 0  |             if(jamoCE32s[j] == Collation::FALLBACK_CE32) { | 
1149  | 0  |                 UChar32 jamo = jamoCpFromIndex(j);  | 
1150  | 0  |                 jamoCE32s[j] = copyFromBaseCE32(jamo, base->getCE32(jamo),  | 
1151  | 0  |                                                 /*withContext=*/ TRUE, errorCode);  | 
1152  | 0  |             }  | 
1153  | 0  |         }  | 
1154  | 0  |     }  | 
1155  | 0  |     return anyJamoAssigned && U_SUCCESS(errorCode);  | 
1156  | 0  | }  | 
1157  |  |  | 
1158  |  | void  | 
1159  | 0  | CollationDataBuilder::setDigitTags(UErrorCode &errorCode) { | 
1160  | 0  |     UnicodeSet digits(UNICODE_STRING_SIMPLE("[:Nd:]"), errorCode); | 
1161  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1162  | 0  |     UnicodeSetIterator iter(digits);  | 
1163  | 0  |     while(iter.next()) { | 
1164  | 0  |         U_ASSERT(!iter.isString());  | 
1165  | 0  |         UChar32 c = iter.getCodepoint();  | 
1166  | 0  |         uint32_t ce32 = utrie2_get32(trie, c);  | 
1167  | 0  |         if(ce32 != Collation::FALLBACK_CE32 && ce32 != Collation::UNASSIGNED_CE32) { | 
1168  | 0  |             int32_t index = addCE32(ce32, errorCode);  | 
1169  | 0  |             if(U_FAILURE(errorCode)) { return; } | 
1170  | 0  |             if(index > Collation::MAX_INDEX) { | 
1171  | 0  |                 errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
1172  | 0  |                 return;  | 
1173  | 0  |             }  | 
1174  | 0  |             ce32 = Collation::makeCE32FromTagIndexAndLength(  | 
1175  | 0  |                     Collation::DIGIT_TAG, index, u_charDigitValue(c));  | 
1176  | 0  |             utrie2_set32(trie, c, ce32, &errorCode);  | 
1177  | 0  |         }  | 
1178  | 0  |     }  | 
1179  | 0  | }  | 
1180  |  |  | 
1181  |  | U_CDECL_BEGIN  | 
1182  |  |  | 
1183  |  | static UBool U_CALLCONV  | 
1184  | 0  | enumRangeLeadValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) { | 
1185  | 0  |     int32_t *pValue = (int32_t *)context;  | 
1186  | 0  |     if(value == Collation::UNASSIGNED_CE32) { | 
1187  | 0  |         value = Collation::LEAD_ALL_UNASSIGNED;  | 
1188  | 0  |     } else if(value == Collation::FALLBACK_CE32) { | 
1189  | 0  |         value = Collation::LEAD_ALL_FALLBACK;  | 
1190  | 0  |     } else { | 
1191  | 0  |         *pValue = Collation::LEAD_MIXED;  | 
1192  | 0  |         return FALSE;  | 
1193  | 0  |     }  | 
1194  | 0  |     if(*pValue < 0) { | 
1195  | 0  |         *pValue = (int32_t)value;  | 
1196  | 0  |     } else if(*pValue != (int32_t)value) { | 
1197  | 0  |         *pValue = Collation::LEAD_MIXED;  | 
1198  | 0  |         return FALSE;  | 
1199  | 0  |     }  | 
1200  | 0  |     return TRUE;  | 
1201  | 0  | }  | 
1202  |  |  | 
1203  |  | U_CDECL_END  | 
1204  |  |  | 
1205  |  | void  | 
1206  | 0  | CollationDataBuilder::setLeadSurrogates(UErrorCode &errorCode) { | 
1207  | 0  |     for(UChar lead = 0xd800; lead < 0xdc00; ++lead) { | 
1208  | 0  |         int32_t value = -1;  | 
1209  | 0  |         utrie2_enumForLeadSurrogate(trie, lead, NULL, enumRangeLeadValue, &value);  | 
1210  | 0  |         utrie2_set32ForLeadSurrogateCodeUnit(  | 
1211  | 0  |             trie, lead,  | 
1212  | 0  |             Collation::makeCE32FromTagAndIndex(Collation::LEAD_SURROGATE_TAG, 0) | (uint32_t)value,  | 
1213  | 0  |             &errorCode);  | 
1214  | 0  |     }  | 
1215  | 0  | }  | 
1216  |  |  | 
1217  |  | void  | 
1218  | 0  | CollationDataBuilder::build(CollationData &data, UErrorCode &errorCode) { | 
1219  | 0  |     buildMappings(data, errorCode);  | 
1220  | 0  |     if(base != NULL) { | 
1221  | 0  |         data.numericPrimary = base->numericPrimary;  | 
1222  | 0  |         data.compressibleBytes = base->compressibleBytes;  | 
1223  | 0  |         data.numScripts = base->numScripts;  | 
1224  | 0  |         data.scriptsIndex = base->scriptsIndex;  | 
1225  | 0  |         data.scriptStarts = base->scriptStarts;  | 
1226  | 0  |         data.scriptStartsLength = base->scriptStartsLength;  | 
1227  | 0  |     }  | 
1228  | 0  |     buildFastLatinTable(data, errorCode);  | 
1229  | 0  | }  | 
1230  |  |  | 
1231  |  | void  | 
1232  | 0  | CollationDataBuilder::buildMappings(CollationData &data, UErrorCode &errorCode) { | 
1233  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1234  | 0  |     if(trie == NULL || utrie2_isFrozen(trie)) { | 
1235  | 0  |         errorCode = U_INVALID_STATE_ERROR;  | 
1236  | 0  |         return;  | 
1237  | 0  |     }  | 
1238  |  |  | 
1239  | 0  |     buildContexts(errorCode);  | 
1240  |  | 
  | 
1241  | 0  |     uint32_t jamoCE32s[CollationData::JAMO_CE32S_LENGTH];  | 
1242  | 0  |     int32_t jamoIndex = -1;  | 
1243  | 0  |     if(getJamoCE32s(jamoCE32s, errorCode)) { | 
1244  | 0  |         jamoIndex = ce32s.size();  | 
1245  | 0  |         for(int32_t i = 0; i < CollationData::JAMO_CE32S_LENGTH; ++i) { | 
1246  | 0  |             ce32s.addElement((int32_t)jamoCE32s[i], errorCode);  | 
1247  | 0  |         }  | 
1248  |  |         // Small optimization: Use a bit in the Hangul ce32  | 
1249  |  |         // to indicate that none of the Jamo CE32s are isSpecialCE32()  | 
1250  |  |         // (as it should be in the root collator).  | 
1251  |  |         // It allows CollationIterator to avoid recursive function calls and per-Jamo tests.  | 
1252  |  |         // In order to still have good trie compression and keep this code simple,  | 
1253  |  |         // we only set this flag if a whole block of 588 Hangul syllables starting with  | 
1254  |  |         // a common leading consonant (Jamo L) has this property.  | 
1255  | 0  |         UBool isAnyJamoVTSpecial = FALSE;  | 
1256  | 0  |         for(int32_t i = Hangul::JAMO_L_COUNT; i < CollationData::JAMO_CE32S_LENGTH; ++i) { | 
1257  | 0  |             if(Collation::isSpecialCE32(jamoCE32s[i])) { | 
1258  | 0  |                 isAnyJamoVTSpecial = TRUE;  | 
1259  | 0  |                 break;  | 
1260  | 0  |             }  | 
1261  | 0  |         }  | 
1262  | 0  |         uint32_t hangulCE32 = Collation::makeCE32FromTagAndIndex(Collation::HANGUL_TAG, 0);  | 
1263  | 0  |         UChar32 c = Hangul::HANGUL_BASE;  | 
1264  | 0  |         for(int32_t i = 0; i < Hangul::JAMO_L_COUNT; ++i) {  // iterate over the Jamo L | 
1265  | 0  |             uint32_t ce32 = hangulCE32;  | 
1266  | 0  |             if(!isAnyJamoVTSpecial && !Collation::isSpecialCE32(jamoCE32s[i])) { | 
1267  | 0  |                 ce32 |= Collation::HANGUL_NO_SPECIAL_JAMO;  | 
1268  | 0  |             }  | 
1269  | 0  |             UChar32 limit = c + Hangul::JAMO_VT_COUNT;  | 
1270  | 0  |             utrie2_setRange32(trie, c, limit - 1, ce32, TRUE, &errorCode);  | 
1271  | 0  |             c = limit;  | 
1272  | 0  |         }  | 
1273  | 0  |     } else { | 
1274  |  |         // Copy the Hangul CE32s from the base in blocks per Jamo L,  | 
1275  |  |         // assuming that HANGUL_NO_SPECIAL_JAMO is set or not set for whole blocks.  | 
1276  | 0  |         for(UChar32 c = Hangul::HANGUL_BASE; c < Hangul::HANGUL_LIMIT;) { | 
1277  | 0  |             uint32_t ce32 = base->getCE32(c);  | 
1278  | 0  |             U_ASSERT(Collation::hasCE32Tag(ce32, Collation::HANGUL_TAG));  | 
1279  | 0  |             UChar32 limit = c + Hangul::JAMO_VT_COUNT;  | 
1280  | 0  |             utrie2_setRange32(trie, c, limit - 1, ce32, TRUE, &errorCode);  | 
1281  | 0  |             c = limit;  | 
1282  | 0  |         }  | 
1283  | 0  |     }  | 
1284  |  | 
  | 
1285  | 0  |     setDigitTags(errorCode);  | 
1286  | 0  |     setLeadSurrogates(errorCode);  | 
1287  |  |  | 
1288  |  |     // For U+0000, move its normal ce32 into CE32s[0] and set U0000_TAG.  | 
1289  | 0  |     ce32s.setElementAt((int32_t)utrie2_get32(trie, 0), 0);  | 
1290  | 0  |     utrie2_set32(trie, 0, Collation::makeCE32FromTagAndIndex(Collation::U0000_TAG, 0), &errorCode);  | 
1291  |  | 
  | 
1292  | 0  |     utrie2_freeze(trie, UTRIE2_32_VALUE_BITS, &errorCode);  | 
1293  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1294  |  |  | 
1295  |  |     // Mark each lead surrogate as "unsafe"  | 
1296  |  |     // if any of its 1024 associated supplementary code points is "unsafe".  | 
1297  | 0  |     UChar32 c = 0x10000;  | 
1298  | 0  |     for(UChar lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) { | 
1299  | 0  |         if(unsafeBackwardSet.containsSome(c, c + 0x3ff)) { | 
1300  | 0  |             unsafeBackwardSet.add(lead);  | 
1301  | 0  |         }  | 
1302  | 0  |     }  | 
1303  | 0  |     unsafeBackwardSet.freeze();  | 
1304  |  | 
  | 
1305  | 0  |     data.trie = trie;  | 
1306  | 0  |     data.ce32s = reinterpret_cast<const uint32_t *>(ce32s.getBuffer());  | 
1307  | 0  |     data.ces = ce64s.getBuffer();  | 
1308  | 0  |     data.contexts = contexts.getBuffer();  | 
1309  |  | 
  | 
1310  | 0  |     data.ce32sLength = ce32s.size();  | 
1311  | 0  |     data.cesLength = ce64s.size();  | 
1312  | 0  |     data.contextsLength = contexts.length();  | 
1313  |  | 
  | 
1314  | 0  |     data.base = base;  | 
1315  | 0  |     if(jamoIndex >= 0) { | 
1316  | 0  |         data.jamoCE32s = data.ce32s + jamoIndex;  | 
1317  | 0  |     } else { | 
1318  | 0  |         data.jamoCE32s = base->jamoCE32s;  | 
1319  | 0  |     }  | 
1320  | 0  |     data.unsafeBackwardSet = &unsafeBackwardSet;  | 
1321  | 0  | }  | 
1322  |  |  | 
1323  |  | void  | 
1324  | 0  | CollationDataBuilder::clearContexts() { | 
1325  | 0  |     contexts.remove();  | 
1326  | 0  |     UnicodeSetIterator iter(contextChars);  | 
1327  | 0  |     while(iter.next()) { | 
1328  | 0  |         U_ASSERT(!iter.isString());  | 
1329  | 0  |         uint32_t ce32 = utrie2_get32(trie, iter.getCodepoint());  | 
1330  | 0  |         U_ASSERT(isBuilderContextCE32(ce32));  | 
1331  | 0  |         getConditionalCE32ForCE32(ce32)->builtCE32 = Collation::NO_CE32;  | 
1332  | 0  |     }  | 
1333  | 0  | }  | 
1334  |  |  | 
1335  |  | void  | 
1336  | 0  | CollationDataBuilder::buildContexts(UErrorCode &errorCode) { | 
1337  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1338  |  |     // Ignore abandoned lists and the cached builtCE32,  | 
1339  |  |     // and build all contexts from scratch.  | 
1340  | 0  |     contexts.remove();  | 
1341  | 0  |     UnicodeSetIterator iter(contextChars);  | 
1342  | 0  |     while(U_SUCCESS(errorCode) && iter.next()) { | 
1343  | 0  |         U_ASSERT(!iter.isString());  | 
1344  | 0  |         UChar32 c = iter.getCodepoint();  | 
1345  | 0  |         uint32_t ce32 = utrie2_get32(trie, c);  | 
1346  | 0  |         if(!isBuilderContextCE32(ce32)) { | 
1347  |  |             // Impossible: No context data for c in contextChars.  | 
1348  | 0  |             errorCode = U_INTERNAL_PROGRAM_ERROR;  | 
1349  | 0  |             return;  | 
1350  | 0  |         }  | 
1351  | 0  |         ConditionalCE32 *cond = getConditionalCE32ForCE32(ce32);  | 
1352  | 0  |         ce32 = buildContext(cond, errorCode);  | 
1353  | 0  |         utrie2_set32(trie, c, ce32, &errorCode);  | 
1354  | 0  |     }  | 
1355  | 0  | }  | 
1356  |  |  | 
1357  |  | uint32_t  | 
1358  | 0  | CollationDataBuilder::buildContext(ConditionalCE32 *head, UErrorCode &errorCode) { | 
1359  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1360  |  |     // The list head must have no context.  | 
1361  | 0  |     U_ASSERT(!head->hasContext());  | 
1362  |  |     // The list head must be followed by one or more nodes that all do have context.  | 
1363  | 0  |     U_ASSERT(head->next >= 0);  | 
1364  | 0  |     UCharsTrieBuilder prefixBuilder(errorCode);  | 
1365  | 0  |     UCharsTrieBuilder contractionBuilder(errorCode);  | 
1366  | 0  |     for(ConditionalCE32 *cond = head;; cond = getConditionalCE32(cond->next)) { | 
1367  |  |         // After the list head, the prefix or suffix can be empty, but not both.  | 
1368  | 0  |         U_ASSERT(cond == head || cond->hasContext());  | 
1369  | 0  |         int32_t prefixLength = cond->prefixLength();  | 
1370  | 0  |         UnicodeString prefix(cond->context, 0, prefixLength + 1);  | 
1371  |  |         // Collect all contraction suffixes for one prefix.  | 
1372  | 0  |         ConditionalCE32 *firstCond = cond;  | 
1373  | 0  |         ConditionalCE32 *lastCond = cond;  | 
1374  | 0  |         while(cond->next >= 0 &&  | 
1375  | 0  |                 (cond = getConditionalCE32(cond->next))->context.startsWith(prefix)) { | 
1376  | 0  |             lastCond = cond;  | 
1377  | 0  |         }  | 
1378  | 0  |         uint32_t ce32;  | 
1379  | 0  |         int32_t suffixStart = prefixLength + 1;  // == prefix.length()  | 
1380  | 0  |         if(lastCond->context.length() == suffixStart) { | 
1381  |  |             // One prefix without contraction suffix.  | 
1382  | 0  |             U_ASSERT(firstCond == lastCond);  | 
1383  | 0  |             ce32 = lastCond->ce32;  | 
1384  | 0  |             cond = lastCond;  | 
1385  | 0  |         } else { | 
1386  |  |             // Build the contractions trie.  | 
1387  | 0  |             contractionBuilder.clear();  | 
1388  |  |             // Entry for an empty suffix, to be stored before the trie.  | 
1389  | 0  |             uint32_t emptySuffixCE32 = 0;  | 
1390  | 0  |             uint32_t flags = 0;  | 
1391  | 0  |             if(firstCond->context.length() == suffixStart) { | 
1392  |  |                 // There is a mapping for the prefix and the single character c. (p|c)  | 
1393  |  |                 // If no other suffix matches, then we return this value.  | 
1394  | 0  |                 emptySuffixCE32 = firstCond->ce32;  | 
1395  | 0  |                 cond = getConditionalCE32(firstCond->next);  | 
1396  | 0  |             } else { | 
1397  |  |                 // There is no mapping for the prefix and just the single character.  | 
1398  |  |                 // (There is no p|c, only p|cd, p|ce etc.)  | 
1399  | 0  |                 flags |= Collation::CONTRACT_SINGLE_CP_NO_MATCH;  | 
1400  |  |                 // When the prefix matches but none of the prefix-specific suffixes,  | 
1401  |  |                 // then we fall back to the mappings with the next-longest prefix,  | 
1402  |  |                 // and ultimately to mappings with no prefix.  | 
1403  |  |                 // Each fallback might be another set of contractions.  | 
1404  |  |                 // For example, if there are mappings for ch, p|cd, p|ce, but not for p|c,  | 
1405  |  |                 // then in text "pch" we find the ch contraction.  | 
1406  | 0  |                 for(cond = head;; cond = getConditionalCE32(cond->next)) { | 
1407  | 0  |                     int32_t length = cond->prefixLength();  | 
1408  | 0  |                     if(length == prefixLength) { break; } | 
1409  | 0  |                     if(cond->defaultCE32 != Collation::NO_CE32 &&  | 
1410  | 0  |                             (length==0 || prefix.endsWith(cond->context, 1, length))) { | 
1411  | 0  |                         emptySuffixCE32 = cond->defaultCE32;  | 
1412  | 0  |                     }  | 
1413  | 0  |                 }  | 
1414  | 0  |                 cond = firstCond;  | 
1415  | 0  |             }  | 
1416  |  |             // Optimization: Set a flag when  | 
1417  |  |             // the first character of every contraction suffix has lccc!=0.  | 
1418  |  |             // Short-circuits contraction matching when a normal letter follows.  | 
1419  | 0  |             flags |= Collation::CONTRACT_NEXT_CCC;  | 
1420  |  |             // Add all of the non-empty suffixes into the contraction trie.  | 
1421  | 0  |             for(;;) { | 
1422  | 0  |                 UnicodeString suffix(cond->context, suffixStart);  | 
1423  | 0  |                 uint16_t fcd16 = nfcImpl.getFCD16(suffix.char32At(0));  | 
1424  | 0  |                 if(fcd16 <= 0xff) { | 
1425  | 0  |                     flags &= ~Collation::CONTRACT_NEXT_CCC;  | 
1426  | 0  |                 }  | 
1427  | 0  |                 fcd16 = nfcImpl.getFCD16(suffix.char32At(suffix.length() - 1));  | 
1428  | 0  |                 if(fcd16 > 0xff) { | 
1429  |  |                     // The last suffix character has lccc!=0, allowing for discontiguous contractions.  | 
1430  | 0  |                     flags |= Collation::CONTRACT_TRAILING_CCC;  | 
1431  | 0  |                 }  | 
1432  | 0  |                 contractionBuilder.add(suffix, (int32_t)cond->ce32, errorCode);  | 
1433  | 0  |                 if(cond == lastCond) { break; } | 
1434  | 0  |                 cond = getConditionalCE32(cond->next);  | 
1435  | 0  |             }  | 
1436  | 0  |             int32_t index = addContextTrie(emptySuffixCE32, contractionBuilder, errorCode);  | 
1437  | 0  |             if(U_FAILURE(errorCode)) { return 0; } | 
1438  | 0  |             if(index > Collation::MAX_INDEX) { | 
1439  | 0  |                 errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
1440  | 0  |                 return 0;  | 
1441  | 0  |             }  | 
1442  | 0  |             ce32 = Collation::makeCE32FromTagAndIndex(Collation::CONTRACTION_TAG, index) | flags;  | 
1443  | 0  |         }  | 
1444  | 0  |         U_ASSERT(cond == lastCond);  | 
1445  | 0  |         firstCond->defaultCE32 = ce32;  | 
1446  | 0  |         if(prefixLength == 0) { | 
1447  | 0  |             if(cond->next < 0) { | 
1448  |  |                 // No non-empty prefixes, only contractions.  | 
1449  | 0  |                 return ce32;  | 
1450  | 0  |             }  | 
1451  | 0  |         } else { | 
1452  | 0  |             prefix.remove(0, 1);  // Remove the length unit.  | 
1453  | 0  |             prefix.reverse();  | 
1454  | 0  |             prefixBuilder.add(prefix, (int32_t)ce32, errorCode);  | 
1455  | 0  |             if(cond->next < 0) { break; } | 
1456  | 0  |         }  | 
1457  | 0  |     }  | 
1458  | 0  |     U_ASSERT(head->defaultCE32 != Collation::NO_CE32);  | 
1459  | 0  |     int32_t index = addContextTrie(head->defaultCE32, prefixBuilder, errorCode);  | 
1460  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1461  | 0  |     if(index > Collation::MAX_INDEX) { | 
1462  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
1463  | 0  |         return 0;  | 
1464  | 0  |     }  | 
1465  | 0  |     return Collation::makeCE32FromTagAndIndex(Collation::PREFIX_TAG, index);  | 
1466  | 0  | }  | 
1467  |  |  | 
1468  |  | int32_t  | 
1469  |  | CollationDataBuilder::addContextTrie(uint32_t defaultCE32, UCharsTrieBuilder &trieBuilder,  | 
1470  | 0  |                                      UErrorCode &errorCode) { | 
1471  | 0  |     UnicodeString context;  | 
1472  | 0  |     context.append((UChar)(defaultCE32 >> 16)).append((UChar)defaultCE32);  | 
1473  | 0  |     UnicodeString trieString;  | 
1474  | 0  |     context.append(trieBuilder.buildUnicodeString(USTRINGTRIE_BUILD_SMALL, trieString, errorCode));  | 
1475  | 0  |     if(U_FAILURE(errorCode)) { return -1; } | 
1476  | 0  |     int32_t index = contexts.indexOf(context);  | 
1477  | 0  |     if(index < 0) { | 
1478  | 0  |         index = contexts.length();  | 
1479  | 0  |         contexts.append(context);  | 
1480  | 0  |     }  | 
1481  | 0  |     return index;  | 
1482  | 0  | }  | 
1483  |  |  | 
1484  |  | void  | 
1485  | 0  | CollationDataBuilder::buildFastLatinTable(CollationData &data, UErrorCode &errorCode) { | 
1486  | 0  |     if(U_FAILURE(errorCode) || !fastLatinEnabled) { return; } | 
1487  |  |  | 
1488  | 0  |     delete fastLatinBuilder;  | 
1489  | 0  |     fastLatinBuilder = new CollationFastLatinBuilder(errorCode);  | 
1490  | 0  |     if(fastLatinBuilder == NULL) { | 
1491  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1492  | 0  |         return;  | 
1493  | 0  |     }  | 
1494  | 0  |     if(fastLatinBuilder->forData(data, errorCode)) { | 
1495  | 0  |         const uint16_t *table = fastLatinBuilder->getTable();  | 
1496  | 0  |         int32_t length = fastLatinBuilder->lengthOfTable();  | 
1497  | 0  |         if(base != NULL && length == base->fastLatinTableLength &&  | 
1498  | 0  |                 uprv_memcmp(table, base->fastLatinTable, length * 2) == 0) { | 
1499  |  |             // Same fast Latin table as in the base, use that one instead.  | 
1500  | 0  |             delete fastLatinBuilder;  | 
1501  | 0  |             fastLatinBuilder = NULL;  | 
1502  | 0  |             table = base->fastLatinTable;  | 
1503  | 0  |         }  | 
1504  | 0  |         data.fastLatinTable = table;  | 
1505  | 0  |         data.fastLatinTableLength = length;  | 
1506  | 0  |     } else { | 
1507  | 0  |         delete fastLatinBuilder;  | 
1508  | 0  |         fastLatinBuilder = NULL;  | 
1509  | 0  |     }  | 
1510  | 0  | }  | 
1511  |  |  | 
1512  |  | int32_t  | 
1513  | 0  | CollationDataBuilder::getCEs(const UnicodeString &s, int64_t ces[], int32_t cesLength) { | 
1514  | 0  |     return getCEs(s, 0, ces, cesLength);  | 
1515  | 0  | }  | 
1516  |  |  | 
1517  |  | int32_t  | 
1518  |  | CollationDataBuilder::getCEs(const UnicodeString &prefix, const UnicodeString &s,  | 
1519  | 0  |                              int64_t ces[], int32_t cesLength) { | 
1520  | 0  |     int32_t prefixLength = prefix.length();  | 
1521  | 0  |     if(prefixLength == 0) { | 
1522  | 0  |         return getCEs(s, 0, ces, cesLength);  | 
1523  | 0  |     } else { | 
1524  | 0  |         return getCEs(prefix + s, prefixLength, ces, cesLength);  | 
1525  | 0  |     }  | 
1526  | 0  | }  | 
1527  |  |  | 
1528  |  | int32_t  | 
1529  |  | CollationDataBuilder::getCEs(const UnicodeString &s, int32_t start,  | 
1530  | 0  |                              int64_t ces[], int32_t cesLength) { | 
1531  | 0  |     if(collIter == NULL) { | 
1532  | 0  |         collIter = new DataBuilderCollationIterator(*this);  | 
1533  | 0  |         if(collIter == NULL) { return 0; } | 
1534  | 0  |     }  | 
1535  | 0  |     return collIter->fetchCEs(s, start, ces, cesLength);  | 
1536  | 0  | }  | 
1537  |  |  | 
1538  |  | U_NAMESPACE_END  | 
1539  |  |  | 
1540  |  | #endif  // !UCONFIG_NO_COLLATION  |