/src/icu/source/common/uset.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2002-2011, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  uset.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2002mar07  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | *  | 
18  |  | *   There are functions to efficiently serialize a USet into an array of uint16_t  | 
19  |  | *   and functions to use such a serialized form efficiently without  | 
20  |  | *   instantiating a new USet.  | 
21  |  | */  | 
22  |  |  | 
23  |  | #include "unicode/utypes.h"  | 
24  |  | #include "unicode/uobject.h"  | 
25  |  | #include "unicode/uset.h"  | 
26  |  | #include "unicode/uniset.h"  | 
27  |  | #include "cmemory.h"  | 
28  |  | #include "unicode/ustring.h"  | 
29  |  | #include "unicode/parsepos.h"  | 
30  |  |  | 
31  |  | U_NAMESPACE_USE  | 
32  |  |  | 
33  |  | U_CAPI USet* U_EXPORT2  | 
34  | 0  | uset_openEmpty() { | 
35  | 0  |     return (USet*) new UnicodeSet();  | 
36  | 0  | }  | 
37  |  |  | 
38  |  | U_CAPI USet* U_EXPORT2  | 
39  | 0  | uset_open(UChar32 start, UChar32 end) { | 
40  | 0  |     return (USet*) new UnicodeSet(start, end);  | 
41  | 0  | }  | 
42  |  |  | 
43  |  | U_CAPI void U_EXPORT2  | 
44  | 0  | uset_close(USet* set) { | 
45  | 0  |     delete (UnicodeSet*) set;  | 
46  | 0  | }  | 
47  |  |  | 
48  |  | U_CAPI USet * U_EXPORT2  | 
49  | 0  | uset_clone(const USet *set) { | 
50  | 0  |     return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());  | 
51  | 0  | }  | 
52  |  |  | 
53  |  | U_CAPI UBool U_EXPORT2  | 
54  | 0  | uset_isFrozen(const USet *set) { | 
55  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::isFrozen();  | 
56  | 0  | }  | 
57  |  |  | 
58  |  | U_CAPI void U_EXPORT2  | 
59  | 0  | uset_freeze(USet *set) { | 
60  | 0  |     ((UnicodeSet*) set)->UnicodeSet::freeze();  | 
61  | 0  | }  | 
62  |  |  | 
63  |  | U_CAPI USet * U_EXPORT2  | 
64  | 0  | uset_cloneAsThawed(const USet *set) { | 
65  | 0  |     return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());  | 
66  | 0  | }  | 
67  |  |  | 
68  |  | U_CAPI void U_EXPORT2  | 
69  |  | uset_set(USet* set,  | 
70  | 0  |      UChar32 start, UChar32 end) { | 
71  | 0  |     ((UnicodeSet*) set)->UnicodeSet::set(start, end);  | 
72  | 0  | }  | 
73  |  |  | 
74  |  | U_CAPI void U_EXPORT2  | 
75  | 0  | uset_addAll(USet* set, const USet *additionalSet) { | 
76  | 0  |     ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));  | 
77  | 0  | }  | 
78  |  |  | 
79  |  | U_CAPI void U_EXPORT2  | 
80  | 0  | uset_add(USet* set, UChar32 c) { | 
81  | 0  |     ((UnicodeSet*) set)->UnicodeSet::add(c);  | 
82  | 0  | }  | 
83  |  |  | 
84  |  | U_CAPI void U_EXPORT2  | 
85  | 0  | uset_addRange(USet* set, UChar32 start, UChar32 end) { | 
86  | 0  |     ((UnicodeSet*) set)->UnicodeSet::add(start, end);      | 
87  | 0  | }  | 
88  |  |  | 
89  |  | U_CAPI void U_EXPORT2  | 
90  | 0  | uset_addString(USet* set, const UChar* str, int32_t strLen) { | 
91  |  |     // UnicodeString handles -1 for strLen  | 
92  | 0  |     UnicodeString s(strLen<0, str, strLen);  | 
93  | 0  |     ((UnicodeSet*) set)->UnicodeSet::add(s);  | 
94  | 0  | }  | 
95  |  |  | 
96  |  | U_CAPI void U_EXPORT2  | 
97  | 0  | uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) { | 
98  |  |     // UnicodeString handles -1 for strLen  | 
99  | 0  |     UnicodeString s(str, strLen);  | 
100  | 0  |     ((UnicodeSet*) set)->UnicodeSet::addAll(s);  | 
101  | 0  | }  | 
102  |  |  | 
103  |  | U_CAPI void U_EXPORT2  | 
104  | 0  | uset_remove(USet* set, UChar32 c) { | 
105  | 0  |     ((UnicodeSet*) set)->UnicodeSet::remove(c);  | 
106  | 0  | }  | 
107  |  |  | 
108  |  | U_CAPI void U_EXPORT2  | 
109  | 0  | uset_removeRange(USet* set, UChar32 start, UChar32 end) { | 
110  | 0  |     ((UnicodeSet*) set)->UnicodeSet::remove(start, end);  | 
111  | 0  | }  | 
112  |  |  | 
113  |  | U_CAPI void U_EXPORT2  | 
114  | 0  | uset_removeString(USet* set, const UChar* str, int32_t strLen) { | 
115  | 0  |     UnicodeString s(strLen==-1, str, strLen);  | 
116  | 0  |     ((UnicodeSet*) set)->UnicodeSet::remove(s);  | 
117  | 0  | }  | 
118  |  |  | 
119  |  | U_CAPI void U_EXPORT2  | 
120  | 0  | uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) { | 
121  | 0  |     UnicodeString s(length==-1, str, length);  | 
122  | 0  |     ((UnicodeSet*) set)->UnicodeSet::removeAll(s);  | 
123  | 0  | }  | 
124  |  |  | 
125  |  | U_CAPI void U_EXPORT2  | 
126  | 0  | uset_removeAll(USet* set, const USet* remove) { | 
127  | 0  |     ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);  | 
128  | 0  | }  | 
129  |  |  | 
130  |  | U_CAPI void U_EXPORT2  | 
131  | 0  | uset_retain(USet* set, UChar32 start, UChar32 end) { | 
132  | 0  |     ((UnicodeSet*) set)->UnicodeSet::retain(start, end);  | 
133  | 0  | }  | 
134  |  |  | 
135  |  | U_CAPI void U_EXPORT2  | 
136  | 0  | uset_retainString(USet *set, const UChar *str, int32_t length) { | 
137  | 0  |     UnicodeString s(length==-1, str, length);  | 
138  | 0  |     ((UnicodeSet*) set)->UnicodeSet::retain(s);  | 
139  | 0  | }  | 
140  |  |  | 
141  |  | U_CAPI void U_EXPORT2  | 
142  | 0  | uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) { | 
143  | 0  |     UnicodeString s(length==-1, str, length);  | 
144  | 0  |     ((UnicodeSet*) set)->UnicodeSet::retainAll(s);  | 
145  | 0  | }  | 
146  |  |  | 
147  |  | U_CAPI void U_EXPORT2  | 
148  | 0  | uset_retainAll(USet* set, const USet* retain) { | 
149  | 0  |     ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);  | 
150  | 0  | }  | 
151  |  |  | 
152  |  | U_CAPI void U_EXPORT2  | 
153  | 0  | uset_compact(USet* set) { | 
154  | 0  |     ((UnicodeSet*) set)->UnicodeSet::compact();  | 
155  | 0  | }  | 
156  |  |  | 
157  |  | U_CAPI void U_EXPORT2  | 
158  | 0  | uset_complement(USet* set) { | 
159  | 0  |     ((UnicodeSet*) set)->UnicodeSet::complement();  | 
160  | 0  | }  | 
161  |  |  | 
162  |  | U_CAPI void U_EXPORT2  | 
163  | 0  | uset_complementRange(USet *set, UChar32 start, UChar32 end) { | 
164  | 0  |     ((UnicodeSet*) set)->UnicodeSet::complement(start, end);  | 
165  | 0  | }  | 
166  |  |  | 
167  |  | U_CAPI void U_EXPORT2  | 
168  | 0  | uset_complementString(USet *set, const UChar *str, int32_t length) { | 
169  | 0  |     UnicodeString s(length==-1, str, length);  | 
170  | 0  |     ((UnicodeSet*) set)->UnicodeSet::complement(s);  | 
171  | 0  | }  | 
172  |  |  | 
173  |  | U_CAPI void U_EXPORT2  | 
174  | 0  | uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) { | 
175  | 0  |     UnicodeString s(length==-1, str, length);  | 
176  | 0  |     ((UnicodeSet*) set)->UnicodeSet::complementAll(s);  | 
177  | 0  | }  | 
178  |  |  | 
179  |  | U_CAPI void U_EXPORT2  | 
180  | 0  | uset_complementAll(USet* set, const USet* complement) { | 
181  | 0  |     ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);  | 
182  | 0  | }  | 
183  |  |  | 
184  |  | U_CAPI void U_EXPORT2  | 
185  | 0  | uset_clear(USet* set) { | 
186  | 0  |     ((UnicodeSet*) set)->UnicodeSet::clear();  | 
187  | 0  | }  | 
188  |  |  | 
189  |  | U_CAPI void U_EXPORT2  | 
190  | 0  | uset_removeAllStrings(USet* set) { | 
191  | 0  |     ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();  | 
192  | 0  | }  | 
193  |  |  | 
194  |  | U_CAPI UBool U_EXPORT2  | 
195  | 0  | uset_isEmpty(const USet* set) { | 
196  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();  | 
197  | 0  | }  | 
198  |  |  | 
199  |  | U_CAPI UBool U_EXPORT2  | 
200  | 0  | uset_hasStrings(const USet* set) { | 
201  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::hasStrings();  | 
202  | 0  | }  | 
203  |  |  | 
204  |  | U_CAPI UBool U_EXPORT2  | 
205  | 0  | uset_contains(const USet* set, UChar32 c) { | 
206  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::contains(c);  | 
207  | 0  | }  | 
208  |  |  | 
209  |  | U_CAPI UBool U_EXPORT2  | 
210  | 0  | uset_containsRange(const USet* set, UChar32 start, UChar32 end) { | 
211  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);  | 
212  | 0  | }  | 
213  |  |  | 
214  |  | U_CAPI UBool U_EXPORT2  | 
215  | 0  | uset_containsString(const USet* set, const UChar* str, int32_t strLen) { | 
216  | 0  |     UnicodeString s(strLen==-1, str, strLen);  | 
217  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::contains(s);  | 
218  | 0  | }  | 
219  |  |  | 
220  |  | U_CAPI UBool U_EXPORT2  | 
221  | 0  | uset_containsAll(const USet* set1, const USet* set2) { | 
222  | 0  |     return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);  | 
223  | 0  | }  | 
224  |  |  | 
225  |  | U_CAPI UBool U_EXPORT2  | 
226  | 0  | uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) { | 
227  |  |     // Create a string alias, since nothing is being added to the set.  | 
228  | 0  |     UnicodeString s(strLen==-1, str, strLen);  | 
229  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);  | 
230  | 0  | }  | 
231  |  |  | 
232  |  | U_CAPI UBool U_EXPORT2  | 
233  | 0  | uset_containsNone(const USet* set1, const USet* set2) { | 
234  | 0  |     return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);  | 
235  | 0  | }  | 
236  |  |  | 
237  |  | U_CAPI UBool U_EXPORT2  | 
238  | 0  | uset_containsSome(const USet* set1, const USet* set2) { | 
239  | 0  |     return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);  | 
240  | 0  | }  | 
241  |  |  | 
242  |  | U_CAPI int32_t U_EXPORT2  | 
243  | 0  | uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { | 
244  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);  | 
245  | 0  | }  | 
246  |  |  | 
247  |  | U_CAPI int32_t U_EXPORT2  | 
248  | 0  | uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { | 
249  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);  | 
250  | 0  | }  | 
251  |  |  | 
252  |  | U_CAPI int32_t U_EXPORT2  | 
253  | 0  | uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { | 
254  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);  | 
255  | 0  | }  | 
256  |  |  | 
257  |  | U_CAPI int32_t U_EXPORT2  | 
258  | 0  | uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { | 
259  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);  | 
260  | 0  | }  | 
261  |  |  | 
262  |  | U_CAPI UBool U_EXPORT2  | 
263  | 0  | uset_equals(const USet* set1, const USet* set2) { | 
264  | 0  |     return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;  | 
265  | 0  | }  | 
266  |  |  | 
267  |  | U_CAPI int32_t U_EXPORT2  | 
268  | 0  | uset_indexOf(const USet* set, UChar32 c) { | 
269  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);  | 
270  | 0  | }  | 
271  |  |  | 
272  |  | U_CAPI UChar32 U_EXPORT2  | 
273  | 0  | uset_charAt(const USet* set, int32_t index) { | 
274  | 0  |     return ((UnicodeSet*) set)->UnicodeSet::charAt(index);  | 
275  | 0  | }  | 
276  |  |  | 
277  |  | U_CAPI int32_t U_EXPORT2  | 
278  | 0  | uset_size(const USet* set) { | 
279  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::size();  | 
280  | 0  | }  | 
281  |  |  | 
282  |  | U_NAMESPACE_BEGIN  | 
283  |  | /**  | 
284  |  |  * This class only exists to provide access to the UnicodeSet private  | 
285  |  |  * USet support API.  Declaring a class a friend is more portable than  | 
286  |  |  * trying to declare extern "C" functions as friends.  | 
287  |  |  */  | 
288  |  | class USetAccess /* not : public UObject because all methods are static */ { | 
289  |  | public:  | 
290  |  |     /* Try to have the compiler inline these*/  | 
291  | 0  |     inline static int32_t getStringCount(const UnicodeSet& set) { | 
292  | 0  |         return set.stringsSize();  | 
293  | 0  |     }  | 
294  |  |     inline static const UnicodeString* getString(const UnicodeSet& set,  | 
295  | 0  |                                                  int32_t i) { | 
296  | 0  |         return set.getString(i);  | 
297  | 0  |     }  | 
298  |  | private:  | 
299  |  |     /* do not instantiate*/  | 
300  |  |     USetAccess();  | 
301  |  | };  | 
302  |  | U_NAMESPACE_END  | 
303  |  |  | 
304  |  | U_CAPI int32_t U_EXPORT2  | 
305  | 0  | uset_getRangeCount(const USet *set) { | 
306  | 0  |     return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();  | 
307  | 0  | }  | 
308  |  |  | 
309  |  | U_CAPI int32_t U_EXPORT2  | 
310  | 0  | uset_getItemCount(const USet* uset) { | 
311  | 0  |     const UnicodeSet& set = *(const UnicodeSet*)uset;  | 
312  | 0  |     return set.getRangeCount() + USetAccess::getStringCount(set);  | 
313  | 0  | }  | 
314  |  |  | 
315  |  | U_CAPI int32_t U_EXPORT2  | 
316  |  | uset_getItem(const USet* uset, int32_t itemIndex,  | 
317  |  |              UChar32* start, UChar32* end,  | 
318  |  |              UChar* str, int32_t strCapacity,  | 
319  | 0  |              UErrorCode* ec) { | 
320  | 0  |     if (U_FAILURE(*ec)) return 0;  | 
321  | 0  |     const UnicodeSet& set = *(const UnicodeSet*)uset;  | 
322  | 0  |     int32_t rangeCount;  | 
323  |  | 
  | 
324  | 0  |     if (itemIndex < 0) { | 
325  | 0  |         *ec = U_ILLEGAL_ARGUMENT_ERROR;  | 
326  | 0  |         return -1;  | 
327  | 0  |     } else if (itemIndex < (rangeCount = set.getRangeCount())) { | 
328  | 0  |         *start = set.getRangeStart(itemIndex);  | 
329  | 0  |         *end = set.getRangeEnd(itemIndex);  | 
330  | 0  |         return 0;  | 
331  | 0  |     } else { | 
332  | 0  |         itemIndex -= rangeCount;  | 
333  | 0  |         if (itemIndex < USetAccess::getStringCount(set)) { | 
334  | 0  |             const UnicodeString* s = USetAccess::getString(set, itemIndex);  | 
335  | 0  |             return s->extract(str, strCapacity, *ec);  | 
336  | 0  |         } else { | 
337  | 0  |             *ec = U_INDEX_OUTOFBOUNDS_ERROR;  | 
338  | 0  |             return -1;  | 
339  | 0  |         }  | 
340  | 0  |     }  | 
341  | 0  | }  | 
342  |  |  | 
343  |  | //U_CAPI UBool U_EXPORT2  | 
344  |  | //uset_getRange(const USet* set, int32_t rangeIndex,  | 
345  |  | //              UChar32* pStart, UChar32* pEnd) { | 
346  |  | //    if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) { | 
347  |  | //        return FALSE;  | 
348  |  | //    }  | 
349  |  | //    const UnicodeSet* us = (const UnicodeSet*) set;  | 
350  |  | //    *pStart = us->getRangeStart(rangeIndex);  | 
351  |  | //    *pEnd = us->getRangeEnd(rangeIndex);  | 
352  |  | //    return TRUE;  | 
353  |  | //}  | 
354  |  |  | 
355  |  | /*  | 
356  |  |  * Serialize a USet into 16-bit units.  | 
357  |  |  * Store BMP code points as themselves with one 16-bit unit each.  | 
358  |  |  *  | 
359  |  |  * Important: the code points in the array are in ascending order,  | 
360  |  |  * therefore all BMP code points precede all supplementary code points.  | 
361  |  |  *  | 
362  |  |  * Store each supplementary code point in 2 16-bit units,  | 
363  |  |  * simply with higher-then-lower 16-bit halves.  | 
364  |  |  *  | 
365  |  |  * Precede the entire list with the length.  | 
366  |  |  * If there are supplementary code points, then set bit 15 in the length  | 
367  |  |  * and add the bmpLength between it and the array.  | 
368  |  |  *  | 
369  |  |  * In other words:  | 
370  |  |  * - all BMP:            (length=bmpLength) BMP, .., BMP  | 
371  |  |  * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..  | 
372  |  |  */  | 
373  |  | U_CAPI int32_t U_EXPORT2  | 
374  | 0  | uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) { | 
375  | 0  |     if (ec==NULL || U_FAILURE(*ec)) { | 
376  | 0  |         return 0;  | 
377  | 0  |     }  | 
378  |  |  | 
379  | 0  |     return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);  | 
380  | 0  | }  | 
381  |  |  | 
382  |  | U_CAPI UBool U_EXPORT2  | 
383  | 0  | uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) { | 
384  | 0  |     int32_t length;  | 
385  |  | 
  | 
386  | 0  |     if(fillSet==NULL) { | 
387  | 0  |         return FALSE;  | 
388  | 0  |     }  | 
389  | 0  |     if(src==NULL || srcLength<=0) { | 
390  | 0  |         fillSet->length=fillSet->bmpLength=0;  | 
391  | 0  |         return FALSE;  | 
392  | 0  |     }  | 
393  |  |  | 
394  | 0  |     length=*src++;  | 
395  | 0  |     if(length&0x8000) { | 
396  |  |         /* there are supplementary values */  | 
397  | 0  |         length&=0x7fff;  | 
398  | 0  |         if(srcLength<(2+length)) { | 
399  | 0  |             fillSet->length=fillSet->bmpLength=0;  | 
400  | 0  |             return FALSE;  | 
401  | 0  |         }  | 
402  | 0  |         fillSet->bmpLength=*src++;  | 
403  | 0  |     } else { | 
404  |  |         /* only BMP values */  | 
405  | 0  |         if(srcLength<(1+length)) { | 
406  | 0  |             fillSet->length=fillSet->bmpLength=0;  | 
407  | 0  |             return FALSE;  | 
408  | 0  |         }  | 
409  | 0  |         fillSet->bmpLength=length;  | 
410  | 0  |     }  | 
411  | 0  |     fillSet->array=src;  | 
412  | 0  |     fillSet->length=length;  | 
413  | 0  |     return TRUE;  | 
414  | 0  | }  | 
415  |  |  | 
416  |  | U_CAPI void U_EXPORT2  | 
417  | 0  | uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) { | 
418  | 0  |     if(fillSet==NULL || (uint32_t)c>0x10ffff) { | 
419  | 0  |         return;  | 
420  | 0  |     }  | 
421  |  |  | 
422  | 0  |     fillSet->array=fillSet->staticArray;  | 
423  | 0  |     if(c<0xffff) { | 
424  | 0  |         fillSet->bmpLength=fillSet->length=2;  | 
425  | 0  |         fillSet->staticArray[0]=(uint16_t)c;  | 
426  | 0  |         fillSet->staticArray[1]=(uint16_t)c+1;  | 
427  | 0  |     } else if(c==0xffff) { | 
428  | 0  |         fillSet->bmpLength=1;  | 
429  | 0  |         fillSet->length=3;  | 
430  | 0  |         fillSet->staticArray[0]=0xffff;  | 
431  | 0  |         fillSet->staticArray[1]=1;  | 
432  | 0  |         fillSet->staticArray[2]=0;  | 
433  | 0  |     } else if(c<0x10ffff) { | 
434  | 0  |         fillSet->bmpLength=0;  | 
435  | 0  |         fillSet->length=4;  | 
436  | 0  |         fillSet->staticArray[0]=(uint16_t)(c>>16);  | 
437  | 0  |         fillSet->staticArray[1]=(uint16_t)c;  | 
438  | 0  |         ++c;  | 
439  | 0  |         fillSet->staticArray[2]=(uint16_t)(c>>16);  | 
440  | 0  |         fillSet->staticArray[3]=(uint16_t)c;  | 
441  | 0  |     } else /* c==0x10ffff */ { | 
442  | 0  |         fillSet->bmpLength=0;  | 
443  | 0  |         fillSet->length=2;  | 
444  | 0  |         fillSet->staticArray[0]=0x10;  | 
445  | 0  |         fillSet->staticArray[1]=0xffff;  | 
446  | 0  |     }  | 
447  | 0  | }  | 
448  |  |  | 
449  |  | U_CAPI UBool U_EXPORT2  | 
450  | 0  | uset_serializedContains(const USerializedSet* set, UChar32 c) { | 
451  | 0  |     const uint16_t* array;  | 
452  |  | 
  | 
453  | 0  |     if(set==NULL || (uint32_t)c>0x10ffff) { | 
454  | 0  |         return FALSE;  | 
455  | 0  |     }  | 
456  |  |  | 
457  | 0  |     array=set->array;  | 
458  | 0  |     if(c<=0xffff) { | 
459  |  |         /* find c in the BMP part */  | 
460  | 0  |         int32_t lo = 0;  | 
461  | 0  |         int32_t hi = set->bmpLength-1;  | 
462  | 0  |         if (c < array[0]) { | 
463  | 0  |             hi = 0;  | 
464  | 0  |         } else if (c < array[hi]) { | 
465  | 0  |             for(;;) { | 
466  | 0  |                 int32_t i = (lo + hi) >> 1;  | 
467  | 0  |                 if (i == lo) { | 
468  | 0  |                     break;  // Done!  | 
469  | 0  |                 } else if (c < array[i]) { | 
470  | 0  |                     hi = i;  | 
471  | 0  |                 } else { | 
472  | 0  |                     lo = i;  | 
473  | 0  |                 }  | 
474  | 0  |             }  | 
475  | 0  |         } else { | 
476  | 0  |             hi += 1;  | 
477  | 0  |         }  | 
478  | 0  |         return (UBool)(hi&1);  | 
479  | 0  |     } else { | 
480  |  |         /* find c in the supplementary part */  | 
481  | 0  |         uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;  | 
482  | 0  |         int32_t base = set->bmpLength;  | 
483  | 0  |         int32_t lo = 0;  | 
484  | 0  |         int32_t hi = set->length - 2 - base;  | 
485  | 0  |         if (high < array[base] || (high==array[base] && low<array[base+1])) { | 
486  | 0  |             hi = 0;  | 
487  | 0  |         } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) { | 
488  | 0  |             for (;;) { | 
489  | 0  |                 int32_t i = ((lo + hi) >> 1) & ~1;  // Guarantee even result  | 
490  | 0  |                 int32_t iabs = i + base;  | 
491  | 0  |                 if (i == lo) { | 
492  | 0  |                     break;  // Done!  | 
493  | 0  |                 } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) { | 
494  | 0  |                     hi = i;  | 
495  | 0  |                 } else { | 
496  | 0  |                     lo = i;  | 
497  | 0  |                 }  | 
498  | 0  |             }  | 
499  | 0  |         } else { | 
500  | 0  |             hi += 2;  | 
501  | 0  |         }  | 
502  |  |         /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */  | 
503  | 0  |         return (UBool)(((hi+(base<<1))&2)!=0);  | 
504  | 0  |     }  | 
505  | 0  | }  | 
506  |  |  | 
507  |  | U_CAPI int32_t U_EXPORT2  | 
508  | 0  | uset_getSerializedRangeCount(const USerializedSet* set) { | 
509  | 0  |     if(set==NULL) { | 
510  | 0  |         return 0;  | 
511  | 0  |     }  | 
512  |  |  | 
513  | 0  |     return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;  | 
514  | 0  | }  | 
515  |  |  | 
516  |  | U_CAPI UBool U_EXPORT2  | 
517  |  | uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,  | 
518  | 0  |                         UChar32* pStart, UChar32* pEnd) { | 
519  | 0  |     const uint16_t* array;  | 
520  | 0  |     int32_t bmpLength, length;  | 
521  |  | 
  | 
522  | 0  |     if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) { | 
523  | 0  |         return FALSE;  | 
524  | 0  |     }  | 
525  |  |  | 
526  | 0  |     array=set->array;  | 
527  | 0  |     length=set->length;  | 
528  | 0  |     bmpLength=set->bmpLength;  | 
529  |  | 
  | 
530  | 0  |     rangeIndex*=2; /* address start/limit pairs */  | 
531  | 0  |     if(rangeIndex<bmpLength) { | 
532  | 0  |         *pStart=array[rangeIndex++];  | 
533  | 0  |         if(rangeIndex<bmpLength) { | 
534  | 0  |             *pEnd=array[rangeIndex]-1;  | 
535  | 0  |         } else if(rangeIndex<length) { | 
536  | 0  |             *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;  | 
537  | 0  |         } else { | 
538  | 0  |             *pEnd=0x10ffff;  | 
539  | 0  |         }  | 
540  | 0  |         return TRUE;  | 
541  | 0  |     } else { | 
542  | 0  |         rangeIndex-=bmpLength;  | 
543  | 0  |         rangeIndex*=2; /* address pairs of pairs of units */  | 
544  | 0  |         length-=bmpLength;  | 
545  | 0  |         if(rangeIndex<length) { | 
546  | 0  |             array+=bmpLength;  | 
547  | 0  |             *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];  | 
548  | 0  |             rangeIndex+=2;  | 
549  | 0  |             if(rangeIndex<length) { | 
550  | 0  |                 *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;  | 
551  | 0  |             } else { | 
552  | 0  |                 *pEnd=0x10ffff;  | 
553  | 0  |             }  | 
554  | 0  |             return TRUE;  | 
555  | 0  |         } else { | 
556  | 0  |             return FALSE;  | 
557  | 0  |         }  | 
558  | 0  |     }  | 
559  | 0  | }  | 
560  |  |  | 
561  |  | // TODO The old, internal uset.c had an efficient uset_containsOne function.  | 
562  |  | // Returned the one and only code point, or else -1 or something.  | 
563  |  | // Consider adding such a function to both C and C++ UnicodeSet/uset.  | 
564  |  | // See tools/gennorm/store.c for usage, now usetContainsOne there.  | 
565  |  |  | 
566  |  | // TODO Investigate incorporating this code into UnicodeSet to improve  | 
567  |  | // efficiency.  | 
568  |  | // ---  | 
569  |  | // #define USET_GROW_DELTA 20  | 
570  |  | //   | 
571  |  | // static int32_t  | 
572  |  | // findChar(const UChar32* array, int32_t length, UChar32 c) { | 
573  |  | //     int32_t i;  | 
574  |  | //   | 
575  |  | //     /* check the last range limit first for more efficient appending */  | 
576  |  | //     if(length>0) { | 
577  |  | //         if(c>=array[length-1]) { | 
578  |  | //             return length;  | 
579  |  | //         }  | 
580  |  | //   | 
581  |  | //         /* do not check the last range limit again in the loop below */  | 
582  |  | //         --length;  | 
583  |  | //     }  | 
584  |  | //   | 
585  |  | //     for(i=0; i<length && c>=array[i]; ++i) {} | 
586  |  | //     return i;  | 
587  |  | // }  | 
588  |  | //   | 
589  |  | // static UBool  | 
590  |  | // addRemove(USet* set, UChar32 c, int32_t doRemove) { | 
591  |  | //     int32_t i, length, more;  | 
592  |  | //   | 
593  |  | //     if(set==NULL || (uint32_t)c>0x10ffff) { | 
594  |  | //         return FALSE;  | 
595  |  | //     }  | 
596  |  | //   | 
597  |  | //     length=set->length;  | 
598  |  | //     i=findChar(set->array, length, c);  | 
599  |  | //     if((i&1)^doRemove) { | 
600  |  | //         /* c is already in the set */  | 
601  |  | //         return TRUE;  | 
602  |  | //     }  | 
603  |  | //   | 
604  |  | //     /* how many more array items do we need? */  | 
605  |  | //     if(i<length && (c+1)==set->array[i]) { | 
606  |  | //         /* c is just before the following range, extend that in-place by one */  | 
607  |  | //         set->array[i]=c;  | 
608  |  | //         if(i>0) { | 
609  |  | //             --i;  | 
610  |  | //             if(c==set->array[i]) { | 
611  |  | //                 /* the previous range collapsed, remove it */  | 
612  |  | //                 set->length=length-=2;  | 
613  |  | //                 if(i<length) { | 
614  |  | //                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);  | 
615  |  | //                 }  | 
616  |  | //             }  | 
617  |  | //         }  | 
618  |  | //         return TRUE;  | 
619  |  | //     } else if(i>0 && c==set->array[i-1]) { | 
620  |  | //         /* c is just after the previous range, extend that in-place by one */  | 
621  |  | //         if(++c<=0x10ffff) { | 
622  |  | //             set->array[i-1]=c;  | 
623  |  | //             if(i<length && c==set->array[i]) { | 
624  |  | //                 /* the following range collapsed, remove it */  | 
625  |  | //                 --i;  | 
626  |  | //                 set->length=length-=2;  | 
627  |  | //                 if(i<length) { | 
628  |  | //                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);  | 
629  |  | //                 }  | 
630  |  | //             }  | 
631  |  | //         } else { | 
632  |  | //             /* extend the previous range (had limit 0x10ffff) to the end of Unicode */  | 
633  |  | //             set->length=i-1;  | 
634  |  | //         }  | 
635  |  | //         return TRUE;  | 
636  |  | //     } else if(i==length && c==0x10ffff) { | 
637  |  | //         /* insert one range limit c */  | 
638  |  | //         more=1;  | 
639  |  | //     } else { | 
640  |  | //         /* insert two range limits c, c+1 */  | 
641  |  | //         more=2;  | 
642  |  | //     }  | 
643  |  | //   | 
644  |  | //     /* insert <more> range limits */  | 
645  |  | //     if(length+more>set->capacity) { | 
646  |  | //         /* reallocate */  | 
647  |  | //         int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;  | 
648  |  | //         UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);  | 
649  |  | //         if(newArray==NULL) { | 
650  |  | //             return FALSE;  | 
651  |  | //         }  | 
652  |  | //         set->capacity=newCapacity;  | 
653  |  | //         uprv_memcpy(newArray, set->array, length*4);  | 
654  |  | //   | 
655  |  | //         if(set->array!=set->staticBuffer) { | 
656  |  | //             uprv_free(set->array);  | 
657  |  | //         }  | 
658  |  | //         set->array=newArray;  | 
659  |  | //     }  | 
660  |  | //   | 
661  |  | //     if(i<length) { | 
662  |  | //         uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);  | 
663  |  | //     }  | 
664  |  | //     set->array[i]=c;  | 
665  |  | //     if(more==2) { | 
666  |  | //         set->array[i+1]=c+1;  | 
667  |  | //     }  | 
668  |  | //     set->length+=more;  | 
669  |  | //   | 
670  |  | //     return TRUE;  | 
671  |  | // }  | 
672  |  | //   | 
673  |  | // U_CAPI UBool U_EXPORT2  | 
674  |  | // uset_add(USet* set, UChar32 c) { | 
675  |  | //     return addRemove(set, c, 0);  | 
676  |  | // }  | 
677  |  | //   | 
678  |  | // U_CAPI void U_EXPORT2  | 
679  |  | // uset_remove(USet* set, UChar32 c) { | 
680  |  | //     addRemove(set, c, 1);  | 
681  |  | // }  |