/src/node/deps/icu-small/source/common/unicode/uset.h
Line  | Count  | Source  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2002-2014, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  uset.h  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2002mar07  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | *  | 
18  |  | *   C version of UnicodeSet.  | 
19  |  | */  | 
20  |  |  | 
21  |  |  | 
22  |  | /**  | 
23  |  |  * \file  | 
24  |  |  * \brief C API: Unicode Set  | 
25  |  |  *  | 
26  |  |  * <p>This is a C wrapper around the C++ UnicodeSet class.</p>  | 
27  |  |  */  | 
28  |  |  | 
29  |  | #ifndef __USET_H__  | 
30  |  | #define __USET_H__  | 
31  |  |  | 
32  |  | #include "unicode/utypes.h"  | 
33  |  | #include "unicode/uchar.h"  | 
34  |  |  | 
35  |  | #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API  | 
36  |  | #include <string>  | 
37  |  | #include <string_view>  | 
38  |  | #include "unicode/char16ptr.h"  | 
39  |  | #include "unicode/localpointer.h"  | 
40  |  | #include "unicode/utf16.h"  | 
41  |  | #endif  | 
42  |  |  | 
43  |  | #ifndef USET_DEFINED  | 
44  |  |  | 
45  |  | #ifndef U_IN_DOXYGEN  | 
46  |  | #define USET_DEFINED  | 
47  |  | #endif  | 
48  |  | /**  | 
49  |  |  * USet is the C API type corresponding to C++ class UnicodeSet.  | 
50  |  |  * Use the uset_* API to manipulate.  Create with  | 
51  |  |  * uset_open*, and destroy with uset_close.  | 
52  |  |  * @stable ICU 2.4  | 
53  |  |  */  | 
54  |  | typedef struct USet USet;  | 
55  |  | #endif  | 
56  |  |  | 
57  |  | /**  | 
58  |  |  * Bitmask values to be passed to uset_openPatternOptions() or  | 
59  |  |  * uset_applyPattern() taking an option parameter.  | 
60  |  |  *  | 
61  |  |  * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.  | 
62  |  |  * These case options are mutually exclusive.  | 
63  |  |  *  | 
64  |  |  * Undefined options bits are ignored, and reserved for future use.  | 
65  |  |  *  | 
66  |  |  * @stable ICU 2.4  | 
67  |  |  */  | 
68  |  | enum { | 
69  |  |     /**  | 
70  |  |      * Ignore white space within patterns unless quoted or escaped.  | 
71  |  |      * @stable ICU 2.4  | 
72  |  |      */  | 
73  |  |     USET_IGNORE_SPACE = 1,  | 
74  |  |  | 
75  |  |     /**  | 
76  |  |      * Enable case insensitive matching.  E.g., "[ab]" with this flag  | 
77  |  |      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will  | 
78  |  |      * match all except 'a', 'A', 'b', and 'B'. This performs a full  | 
79  |  |      * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.  | 
80  |  |      *  | 
81  |  |      * The resulting set is a superset of the input for the code points but  | 
82  |  |      * not for the strings.  | 
83  |  |      * It performs a case mapping closure of the code points and adds  | 
84  |  |      * full case folding strings for the code points, and reduces strings of  | 
85  |  |      * the original set to their full case folding equivalents.  | 
86  |  |      *  | 
87  |  |      * This is designed for case-insensitive matches, for example  | 
88  |  |      * in regular expressions. The full code point case closure allows checking of  | 
89  |  |      * an input character directly against the closure set.  | 
90  |  |      * Strings are matched by comparing the case-folded form from the closure  | 
91  |  |      * set with an incremental case folding of the string in question.  | 
92  |  |      *  | 
93  |  |      * The closure set will also contain single code points if the original  | 
94  |  |      * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).  | 
95  |  |      * This is not necessary (that is, redundant) for the above matching method  | 
96  |  |      * but results in the same closure sets regardless of whether the original  | 
97  |  |      * set contained the code point or a string.  | 
98  |  |      *  | 
99  |  |      * @stable ICU 2.4  | 
100  |  |      */  | 
101  |  |     USET_CASE_INSENSITIVE = 2,  | 
102  |  |  | 
103  |  |     /**  | 
104  |  |      * Adds all case mappings for each element in the set.  | 
105  |  |      * This adds the full lower-, title-, and uppercase mappings as well as the full case folding  | 
106  |  |      * of each existing element in the set.  | 
107  |  |      *  | 
108  |  |      * Unlike the “case insensitive” options, this does not perform a closure.  | 
109  |  |      * For example, it does not add 'ſ' (U+017F long s) for 's',  | 
110  |  |      * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.  | 
111  |  |      *  | 
112  |  |      * @stable ICU 3.2  | 
113  |  |      */  | 
114  |  |     USET_ADD_CASE_MAPPINGS = 4,  | 
115  |  |  | 
116  |  |     /**  | 
117  |  |      * Enable case insensitive matching.  | 
118  |  |      * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,  | 
119  |  |      * which map each code point to one code point,  | 
120  |  |      * not full Case_Folding (cf) mappings, which map some code points to multiple code points.  | 
121  |  |      *  | 
122  |  |      * This is designed for case-insensitive matches, for example in certain  | 
123  |  |      * regular expression implementations where only Simple_Case_Folding mappings are used,  | 
124  |  |      * such as in ECMAScript (JavaScript) regular expressions.  | 
125  |  |      *  | 
126  |  |      * @stable ICU 73  | 
127  |  |      */  | 
128  |  |     USET_SIMPLE_CASE_INSENSITIVE = 6  | 
129  |  | };  | 
130  |  |  | 
131  |  | /**  | 
132  |  |  * Argument values for whether span() and similar functions continue while  | 
133  |  |  * the current character is contained vs. not contained in the set.  | 
134  |  |  *  | 
135  |  |  * The functionality is straightforward for sets with only single code points,  | 
136  |  |  * without strings (which is the common case):  | 
137  |  |  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.  | 
138  |  |  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.  | 
139  |  |  * - span() and spanBack() partition any string the same way when  | 
140  |  |  *   alternating between span(USET_SPAN_NOT_CONTAINED) and  | 
141  |  |  *   span(either "contained" condition).  | 
142  |  |  * - Using a complemented (inverted) set and the opposite span conditions  | 
143  |  |  *   yields the same results.  | 
144  |  |  *  | 
145  |  |  * When a set contains multi-code point strings, then these statements may not  | 
146  |  |  * be true, depending on the strings in the set (for example, whether they  | 
147  |  |  * overlap with each other) and the string that is processed.  | 
148  |  |  * For a set with strings:  | 
149  |  |  * - The complement of the set contains the opposite set of code points,  | 
150  |  |  *   but the same set of strings.  | 
151  |  |  *   Therefore, complementing both the set and the span conditions  | 
152  |  |  *   may yield different results.  | 
153  |  |  * - When starting spans at different positions in a string  | 
154  |  |  *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different  | 
155  |  |  *   because a set string may start before the later position.  | 
156  |  |  * - span(USET_SPAN_SIMPLE) may be shorter than  | 
157  |  |  *   span(USET_SPAN_CONTAINED) because it will not recursively try  | 
158  |  |  *   all possible paths.  | 
159  |  |  *   For example, with a set which contains the three strings "xy", "xya" and "ax",  | 
160  |  |  *   span("xyax", USET_SPAN_CONTAINED) will return 4 but | 
161  |  |  *   span("xyax", USET_SPAN_SIMPLE) will return 3. | 
162  |  |  *   span(USET_SPAN_SIMPLE) will never be longer than  | 
163  |  |  *   span(USET_SPAN_CONTAINED).  | 
164  |  |  * - With either "contained" condition, span() and spanBack() may partition  | 
165  |  |  *   a string in different ways.  | 
166  |  |  *   For example, with a set which contains the two strings "ab" and "ba",  | 
167  |  |  *   and when processing the string "aba",  | 
168  |  |  *   span() will yield contained/not-contained boundaries of { 0, 2, 3 } | 
169  |  |  *   while spanBack() will yield boundaries of { 0, 1, 3 }. | 
170  |  |  *  | 
171  |  |  * Note: If it is important to get the same boundaries whether iterating forward  | 
172  |  |  * or backward through a string, then either only span() should be used and  | 
173  |  |  * the boundaries cached for backward operation, or an ICU BreakIterator  | 
174  |  |  * could be used.  | 
175  |  |  *  | 
176  |  |  * Note: Unpaired surrogates are treated like surrogate code points.  | 
177  |  |  * Similarly, set strings match only on code point boundaries,  | 
178  |  |  * never in the middle of a surrogate pair.  | 
179  |  |  * Illegal UTF-8 sequences are treated like U+FFFD.  | 
180  |  |  * When processing UTF-8 strings, malformed set strings  | 
181  |  |  * (strings with unpaired surrogates which cannot be converted to UTF-8)  | 
182  |  |  * are ignored.  | 
183  |  |  *  | 
184  |  |  * @stable ICU 3.8  | 
185  |  |  */  | 
186  |  | typedef enum USetSpanCondition { | 
187  |  |     /**  | 
188  |  |      * Continues a span() while there is no set element at the current position.  | 
189  |  |      * Increments by one code point at a time.  | 
190  |  |      * Stops before the first set element (character or string).  | 
191  |  |      * (For code points only, this is like while contains(current)==false).  | 
192  |  |      *  | 
193  |  |      * When span() returns, the substring between where it started and the position  | 
194  |  |      * it returned consists only of characters that are not in the set,  | 
195  |  |      * and none of its strings overlap with the span.  | 
196  |  |      *  | 
197  |  |      * @stable ICU 3.8  | 
198  |  |      */  | 
199  |  |     USET_SPAN_NOT_CONTAINED = 0,  | 
200  |  |     /**  | 
201  |  |      * Spans the longest substring that is a concatenation of set elements (characters or strings).  | 
202  |  |      * (For characters only, this is like while contains(current)==true).  | 
203  |  |      *  | 
204  |  |      * When span() returns, the substring between where it started and the position  | 
205  |  |      * it returned consists only of set elements (characters or strings) that are in the set.  | 
206  |  |      *  | 
207  |  |      * If a set contains strings, then the span will be the longest substring for which there  | 
208  |  |      * exists at least one non-overlapping concatenation of set elements (characters or strings).  | 
209  |  |      * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.  | 
210  |  |      * (Java/ICU/Perl regex stops at the first match of an OR.)  | 
211  |  |      *  | 
212  |  |      * @stable ICU 3.8  | 
213  |  |      */  | 
214  |  |     USET_SPAN_CONTAINED = 1,  | 
215  |  |     /**  | 
216  |  |      * Continues a span() while there is a set element at the current position.  | 
217  |  |      * Increments by the longest matching element at each position.  | 
218  |  |      * (For characters only, this is like while contains(current)==true).  | 
219  |  |      *  | 
220  |  |      * When span() returns, the substring between where it started and the position  | 
221  |  |      * it returned consists only of set elements (characters or strings) that are in the set.  | 
222  |  |      *  | 
223  |  |      * If a set only contains single characters, then this is the same  | 
224  |  |      * as USET_SPAN_CONTAINED.  | 
225  |  |      *  | 
226  |  |      * If a set contains strings, then the span will be the longest substring  | 
227  |  |      * with a match at each position with the longest single set element (character or string).  | 
228  |  |      *  | 
229  |  |      * Use this span condition together with other longest-match algorithms,  | 
230  |  |      * such as ICU converters (ucnv_getUnicodeSet()).  | 
231  |  |      *  | 
232  |  |      * @stable ICU 3.8  | 
233  |  |      */  | 
234  |  |     USET_SPAN_SIMPLE = 2,  | 
235  |  | #ifndef U_HIDE_DEPRECATED_API  | 
236  |  |     /**  | 
237  |  |      * One more than the last span condition.  | 
238  |  |      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.  | 
239  |  |      */  | 
240  |  |     USET_SPAN_CONDITION_COUNT  | 
241  |  | #endif  // U_HIDE_DEPRECATED_API  | 
242  |  | } USetSpanCondition;  | 
243  |  |  | 
244  |  | enum { | 
245  |  |     /**  | 
246  |  |      * Capacity of USerializedSet::staticArray.  | 
247  |  |      * Enough for any single-code point set.  | 
248  |  |      * Also provides padding for nice sizeof(USerializedSet).  | 
249  |  |      * @stable ICU 2.4  | 
250  |  |      */  | 
251  |  |     USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8  | 
252  |  | };  | 
253  |  |  | 
254  |  | /**  | 
255  |  |  * A serialized form of a Unicode set.  Limited manipulations are  | 
256  |  |  * possible directly on a serialized set.  See below.  | 
257  |  |  * @stable ICU 2.4  | 
258  |  |  */  | 
259  |  | typedef struct USerializedSet { | 
260  |  |     /**  | 
261  |  |      * The serialized Unicode Set.  | 
262  |  |      * @stable ICU 2.4  | 
263  |  |      */  | 
264  |  |     const uint16_t *array;  | 
265  |  |     /**  | 
266  |  |      * The length of the array that contains BMP characters.  | 
267  |  |      * @stable ICU 2.4  | 
268  |  |      */  | 
269  |  |     int32_t bmpLength;  | 
270  |  |     /**  | 
271  |  |      * The total length of the array.  | 
272  |  |      * @stable ICU 2.4  | 
273  |  |      */  | 
274  |  |     int32_t length;  | 
275  |  |     /**  | 
276  |  |      * A small buffer for the array to reduce memory allocations.  | 
277  |  |      * @stable ICU 2.4  | 
278  |  |      */  | 
279  |  |     uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];  | 
280  |  | } USerializedSet;  | 
281  |  |  | 
282  |  | /*********************************************************************  | 
283  |  |  * USet API  | 
284  |  |  *********************************************************************/  | 
285  |  |  | 
286  |  | /**  | 
287  |  |  * Create an empty USet object.  | 
288  |  |  * Equivalent to uset_open(1, 0).  | 
289  |  |  * @return a newly created USet.  The caller must call uset_close() on  | 
290  |  |  * it when done.  | 
291  |  |  * @stable ICU 4.2  | 
292  |  |  */  | 
293  |  | U_CAPI USet* U_EXPORT2  | 
294  |  | uset_openEmpty(void);  | 
295  |  |  | 
296  |  | /**  | 
297  |  |  * Creates a USet object that contains the range of characters  | 
298  |  |  * start..end, inclusive.  If <code>start > end</code>   | 
299  |  |  * then an empty set is created (same as using uset_openEmpty()).  | 
300  |  |  * @param start first character of the range, inclusive  | 
301  |  |  * @param end last character of the range, inclusive  | 
302  |  |  * @return a newly created USet.  The caller must call uset_close() on  | 
303  |  |  * it when done.  | 
304  |  |  * @stable ICU 2.4  | 
305  |  |  */  | 
306  |  | U_CAPI USet* U_EXPORT2  | 
307  |  | uset_open(UChar32 start, UChar32 end);  | 
308  |  |  | 
309  |  | /**  | 
310  |  |  * Creates a set from the given pattern.  See the UnicodeSet class  | 
311  |  |  * description for the syntax of the pattern language.  | 
312  |  |  * @param pattern a string specifying what characters are in the set  | 
313  |  |  * @param patternLength the length of the pattern, or -1 if null  | 
314  |  |  * terminated  | 
315  |  |  * @param ec the error code  | 
316  |  |  * @stable ICU 2.4  | 
317  |  |  */  | 
318  |  | U_CAPI USet* U_EXPORT2  | 
319  |  | uset_openPattern(const UChar* pattern, int32_t patternLength,  | 
320  |  |                  UErrorCode* ec);  | 
321  |  |  | 
322  |  | /**  | 
323  |  |  * Creates a set from the given pattern.  See the UnicodeSet class  | 
324  |  |  * description for the syntax of the pattern language.  | 
325  |  |  * @param pattern a string specifying what characters are in the set  | 
326  |  |  * @param patternLength the length of the pattern, or -1 if null  | 
327  |  |  * terminated  | 
328  |  |  * @param options bitmask for options to apply to the pattern.  | 
329  |  |  * Valid options are USET_IGNORE_SPACE and  | 
330  |  |  * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.  | 
331  |  |  * These case options are mutually exclusive.  | 
332  |  |  * @param ec the error code  | 
333  |  |  * @stable ICU 2.4  | 
334  |  |  */  | 
335  |  | U_CAPI USet* U_EXPORT2  | 
336  |  | uset_openPatternOptions(const UChar* pattern, int32_t patternLength,  | 
337  |  |                  uint32_t options,  | 
338  |  |                  UErrorCode* ec);  | 
339  |  |  | 
340  |  | /**  | 
341  |  |  * Disposes of the storage used by a USet object.  This function should  | 
342  |  |  * be called exactly once for objects returned by uset_open().  | 
343  |  |  * @param set the object to dispose of  | 
344  |  |  * @stable ICU 2.4  | 
345  |  |  */  | 
346  |  | U_CAPI void U_EXPORT2  | 
347  |  | uset_close(USet* set);  | 
348  |  |  | 
349  |  | #if U_SHOW_CPLUSPLUS_API  | 
350  |  |  | 
351  |  | U_NAMESPACE_BEGIN  | 
352  |  |  | 
353  |  | /**  | 
354  |  |  * \class LocalUSetPointer  | 
355  |  |  * "Smart pointer" class, closes a USet via uset_close().  | 
356  |  |  * For most methods see the LocalPointerBase base class.  | 
357  |  |  *  | 
358  |  |  * @see LocalPointerBase  | 
359  |  |  * @see LocalPointer  | 
360  |  |  * @stable ICU 4.4  | 
361  |  |  */  | 
362  |  | U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);  | 
363  |  |  | 
364  |  | U_NAMESPACE_END  | 
365  |  |  | 
366  |  | #endif  | 
367  |  |  | 
368  |  | /**  | 
369  |  |  * Returns a copy of this object.  | 
370  |  |  * If this set is frozen, then the clone will be frozen as well.  | 
371  |  |  * Use uset_cloneAsThawed() for a mutable clone of a frozen set.  | 
372  |  |  * @param set the original set  | 
373  |  |  * @return the newly allocated copy of the set  | 
374  |  |  * @see uset_cloneAsThawed  | 
375  |  |  * @stable ICU 3.8  | 
376  |  |  */  | 
377  |  | U_CAPI USet * U_EXPORT2  | 
378  |  | uset_clone(const USet *set);  | 
379  |  |  | 
380  |  | /**  | 
381  |  |  * Determines whether the set has been frozen (made immutable) or not.  | 
382  |  |  * See the ICU4J Freezable interface for details.  | 
383  |  |  * @param set the set  | 
384  |  |  * @return true/false for whether the set has been frozen  | 
385  |  |  * @see uset_freeze  | 
386  |  |  * @see uset_cloneAsThawed  | 
387  |  |  * @stable ICU 3.8  | 
388  |  |  */  | 
389  |  | U_CAPI UBool U_EXPORT2  | 
390  |  | uset_isFrozen(const USet *set);  | 
391  |  |  | 
392  |  | /**  | 
393  |  |  * Freeze the set (make it immutable).  | 
394  |  |  * Once frozen, it cannot be unfrozen and is therefore thread-safe  | 
395  |  |  * until it is deleted.  | 
396  |  |  * See the ICU4J Freezable interface for details.  | 
397  |  |  * Freezing the set may also make some operations faster, for example  | 
398  |  |  * uset_contains() and uset_span().  | 
399  |  |  * A frozen set will not be modified. (It remains frozen.)  | 
400  |  |  * @param set the set  | 
401  |  |  * @return the same set, now frozen  | 
402  |  |  * @see uset_isFrozen  | 
403  |  |  * @see uset_cloneAsThawed  | 
404  |  |  * @stable ICU 3.8  | 
405  |  |  */  | 
406  |  | U_CAPI void U_EXPORT2  | 
407  |  | uset_freeze(USet *set);  | 
408  |  |  | 
409  |  | /**  | 
410  |  |  * Clone the set and make the clone mutable.  | 
411  |  |  * See the ICU4J Freezable interface for details.  | 
412  |  |  * @param set the set  | 
413  |  |  * @return the mutable clone  | 
414  |  |  * @see uset_freeze  | 
415  |  |  * @see uset_isFrozen  | 
416  |  |  * @see uset_clone  | 
417  |  |  * @stable ICU 3.8  | 
418  |  |  */  | 
419  |  | U_CAPI USet * U_EXPORT2  | 
420  |  | uset_cloneAsThawed(const USet *set);  | 
421  |  |  | 
422  |  | /**  | 
423  |  |  * Causes the USet object to represent the range <code>start - end</code>.  | 
424  |  |  * If <code>start > end</code> then this USet is set to an empty range.  | 
425  |  |  * A frozen set will not be modified.  | 
426  |  |  * @param set the object to set to the given range  | 
427  |  |  * @param start first character in the set, inclusive  | 
428  |  |  * @param end last character in the set, inclusive  | 
429  |  |  * @stable ICU 3.2  | 
430  |  |  */  | 
431  |  | U_CAPI void U_EXPORT2  | 
432  |  | uset_set(USet* set,  | 
433  |  |          UChar32 start, UChar32 end);  | 
434  |  |  | 
435  |  | /**  | 
436  |  |  * Modifies the set to represent the set specified by the given  | 
437  |  |  * pattern. See the UnicodeSet class description for the syntax of   | 
438  |  |  * the pattern language. See also the User Guide chapter about UnicodeSet.  | 
439  |  |  * <em>Empties the set passed before applying the pattern.</em>  | 
440  |  |  * A frozen set will not be modified.  | 
441  |  |  * @param set               The set to which the pattern is to be applied.   | 
442  |  |  * @param pattern           A pointer to UChar string specifying what characters are in the set.  | 
443  |  |  *                          The character at pattern[0] must be a '['.  | 
444  |  |  * @param patternLength     The length of the UChar string. -1 if NUL terminated.  | 
445  |  |  * @param options           A bitmask for options to apply to the pattern.  | 
446  |  |  *                          Valid options are USET_IGNORE_SPACE and  | 
447  |  |  *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,  | 
448  |  |  *                          USET_SIMPLE_CASE_INSENSITIVE.  | 
449  |  |  *                          These case options are mutually exclusive.  | 
450  |  |  * @param status            Returns an error if the pattern cannot be parsed.  | 
451  |  |  * @return                  Upon successful parse, the value is either  | 
452  |  |  *                          the index of the character after the closing ']'   | 
453  |  |  *                          of the parsed pattern.  | 
454  |  |  *                          If the status code indicates failure, then the return value   | 
455  |  |  *                          is the index of the error in the source.  | 
456  |  |  *  | 
457  |  |  * @stable ICU 2.8  | 
458  |  |  */  | 
459  |  | U_CAPI int32_t U_EXPORT2   | 
460  |  | uset_applyPattern(USet *set,  | 
461  |  |                   const UChar *pattern, int32_t patternLength,  | 
462  |  |                   uint32_t options,  | 
463  |  |                   UErrorCode *status);  | 
464  |  |  | 
465  |  | /**  | 
466  |  |  * Modifies the set to contain those code points which have the given value  | 
467  |  |  * for the given binary or enumerated property, as returned by  | 
468  |  |  * u_getIntPropertyValue.  Prior contents of this set are lost.  | 
469  |  |  * A frozen set will not be modified.  | 
470  |  |  *  | 
471  |  |  * @param set the object to contain the code points defined by the property  | 
472  |  |  *  | 
473  |  |  * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1  | 
474  |  |  * or UCHAR_INT_START..UCHAR_INT_LIMIT-1  | 
475  |  |  * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.  | 
476  |  |  *  | 
477  |  |  * @param value a value in the range u_getIntPropertyMinValue(prop)..  | 
478  |  |  * u_getIntPropertyMaxValue(prop), with one exception.  If prop is  | 
479  |  |  * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but  | 
480  |  |  * rather a mask value produced by U_GET_GC_MASK().  This allows grouped  | 
481  |  |  * categories such as [:L:] to be represented.  | 
482  |  |  *  | 
483  |  |  * @param ec error code input/output parameter  | 
484  |  |  *  | 
485  |  |  * @stable ICU 3.2  | 
486  |  |  */  | 
487  |  | U_CAPI void U_EXPORT2  | 
488  |  | uset_applyIntPropertyValue(USet* set,  | 
489  |  |                            UProperty prop, int32_t value, UErrorCode* ec);  | 
490  |  |  | 
491  |  | /**  | 
492  |  |  * Modifies the set to contain those code points which have the  | 
493  |  |  * given value for the given property.  Prior contents of this  | 
494  |  |  * set are lost.  | 
495  |  |  * A frozen set will not be modified.  | 
496  |  |  *  | 
497  |  |  * @param set the object to contain the code points defined by the given  | 
498  |  |  * property and value alias  | 
499  |  |  *  | 
500  |  |  * @param prop a string specifying a property alias, either short or long.  | 
501  |  |  * The name is matched loosely.  See PropertyAliases.txt for names and a  | 
502  |  |  * description of loose matching.  If the value string is empty, then this  | 
503  |  |  * string is interpreted as either a General_Category value alias, a Script  | 
504  |  |  * value alias, a binary property alias, or a special ID.  Special IDs are  | 
505  |  |  * matched loosely and correspond to the following sets:  | 
506  |  |  *  | 
507  |  |  * "ANY" = [\\u0000-\\U0010FFFF],  | 
508  |  |  * "ASCII" = [\\u0000-\\u007F],  | 
509  |  |  * "Assigned" = [:^Cn:].  | 
510  |  |  *  | 
511  |  |  * @param propLength the length of the prop, or -1 if NULL  | 
512  |  |  *  | 
513  |  |  * @param value a string specifying a value alias, either short or long.  | 
514  |  |  * The name is matched loosely.  See PropertyValueAliases.txt for names  | 
515  |  |  * and a description of loose matching.  In addition to aliases listed,  | 
516  |  |  * numeric values and canonical combining classes may be expressed  | 
517  |  |  * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string | 
518  |  |  * may also be empty.  | 
519  |  |  *  | 
520  |  |  * @param valueLength the length of the value, or -1 if NULL  | 
521  |  |  *  | 
522  |  |  * @param ec error code input/output parameter  | 
523  |  |  *  | 
524  |  |  * @stable ICU 3.2  | 
525  |  |  */  | 
526  |  | U_CAPI void U_EXPORT2  | 
527  |  | uset_applyPropertyAlias(USet* set,  | 
528  |  |                         const UChar *prop, int32_t propLength,  | 
529  |  |                         const UChar *value, int32_t valueLength,  | 
530  |  |                         UErrorCode* ec);  | 
531  |  |  | 
532  |  | /**  | 
533  |  |  * Return true if the given position, in the given pattern, appears  | 
534  |  |  * to be the start of a UnicodeSet pattern.  | 
535  |  |  *  | 
536  |  |  * @param pattern a string specifying the pattern  | 
537  |  |  * @param patternLength the length of the pattern, or -1 if NULL  | 
538  |  |  * @param pos the given position  | 
539  |  |  * @stable ICU 3.2  | 
540  |  |  */  | 
541  |  | U_CAPI UBool U_EXPORT2  | 
542  |  | uset_resemblesPattern(const UChar *pattern, int32_t patternLength,  | 
543  |  |                       int32_t pos);  | 
544  |  |  | 
545  |  | /**  | 
546  |  |  * Returns a string representation of this set.  If the result of  | 
547  |  |  * calling this function is passed to a uset_openPattern(), it  | 
548  |  |  * will produce another set that is equal to this one.  | 
549  |  |  * @param set the set  | 
550  |  |  * @param result the string to receive the rules, may be NULL  | 
551  |  |  * @param resultCapacity the capacity of result, may be 0 if result is NULL  | 
552  |  |  * @param escapeUnprintable if true then convert unprintable  | 
553  |  |  * character to their hex escape representations, \\uxxxx or  | 
554  |  |  * \\Uxxxxxxxx.  Unprintable characters are those other than  | 
555  |  |  * U+000A, U+0020..U+007E.  | 
556  |  |  * @param ec error code.  | 
557  |  |  * @return length of string, possibly larger than resultCapacity  | 
558  |  |  * @stable ICU 2.4  | 
559  |  |  */  | 
560  |  | U_CAPI int32_t U_EXPORT2  | 
561  |  | uset_toPattern(const USet* set,  | 
562  |  |                UChar* result, int32_t resultCapacity,  | 
563  |  |                UBool escapeUnprintable,  | 
564  |  |                UErrorCode* ec);  | 
565  |  |  | 
566  |  | /**  | 
567  |  |  * Adds the given character to the given USet.  After this call,  | 
568  |  |  * uset_contains(set, c) will return true.  | 
569  |  |  * A frozen set will not be modified.  | 
570  |  |  * @param set the object to which to add the character  | 
571  |  |  * @param c the character to add  | 
572  |  |  * @stable ICU 2.4  | 
573  |  |  */  | 
574  |  | U_CAPI void U_EXPORT2  | 
575  |  | uset_add(USet* set, UChar32 c);  | 
576  |  |  | 
577  |  | /**  | 
578  |  |  * Adds all of the elements in the specified set to this set if  | 
579  |  |  * they're not already present.  This operation effectively  | 
580  |  |  * modifies this set so that its value is the <i>union</i> of the two  | 
581  |  |  * sets.  The behavior of this operation is unspecified if the specified  | 
582  |  |  * collection is modified while the operation is in progress.  | 
583  |  |  * A frozen set will not be modified.  | 
584  |  |  *  | 
585  |  |  * @param set the object to which to add the set  | 
586  |  |  * @param additionalSet the source set whose elements are to be added to this set.  | 
587  |  |  * @stable ICU 2.6  | 
588  |  |  */  | 
589  |  | U_CAPI void U_EXPORT2  | 
590  |  | uset_addAll(USet* set, const USet *additionalSet);  | 
591  |  |  | 
592  |  | /**  | 
593  |  |  * Adds the given range of characters to the given USet.  After this call,  | 
594  |  |  * uset_contains(set, start, end) will return true.  | 
595  |  |  * A frozen set will not be modified.  | 
596  |  |  * @param set the object to which to add the character  | 
597  |  |  * @param start the first character of the range to add, inclusive  | 
598  |  |  * @param end the last character of the range to add, inclusive  | 
599  |  |  * @stable ICU 2.2  | 
600  |  |  */  | 
601  |  | U_CAPI void U_EXPORT2  | 
602  |  | uset_addRange(USet* set, UChar32 start, UChar32 end);  | 
603  |  |  | 
604  |  | /**  | 
605  |  |  * Adds the given string to the given USet.  After this call,  | 
606  |  |  * uset_containsString(set, str, strLen) will return true.  | 
607  |  |  * A frozen set will not be modified.  | 
608  |  |  * @param set the object to which to add the character  | 
609  |  |  * @param str the string to add  | 
610  |  |  * @param strLen the length of the string or -1 if null terminated.  | 
611  |  |  * @stable ICU 2.4  | 
612  |  |  */  | 
613  |  | U_CAPI void U_EXPORT2  | 
614  |  | uset_addString(USet* set, const UChar* str, int32_t strLen);  | 
615  |  |  | 
616  |  | /**  | 
617  |  |  * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"} | 
618  |  |  * If this set already contains any particular character, it has no effect on that character.  | 
619  |  |  * A frozen set will not be modified.  | 
620  |  |  * @param set the object to which to add the character  | 
621  |  |  * @param str the source string  | 
622  |  |  * @param strLen the length of the string or -1 if null terminated.  | 
623  |  |  * @stable ICU 3.4  | 
624  |  |  */  | 
625  |  | U_CAPI void U_EXPORT2  | 
626  |  | uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);  | 
627  |  |  | 
628  |  | /**  | 
629  |  |  * Removes the given character from the given USet.  After this call,  | 
630  |  |  * uset_contains(set, c) will return false.  | 
631  |  |  * A frozen set will not be modified.  | 
632  |  |  * @param set the object from which to remove the character  | 
633  |  |  * @param c the character to remove  | 
634  |  |  * @stable ICU 2.4  | 
635  |  |  */  | 
636  |  | U_CAPI void U_EXPORT2  | 
637  |  | uset_remove(USet* set, UChar32 c);  | 
638  |  |  | 
639  |  | /**  | 
640  |  |  * Removes the given range of characters from the given USet.  After this call,  | 
641  |  |  * uset_contains(set, start, end) will return false.  | 
642  |  |  * A frozen set will not be modified.  | 
643  |  |  * @param set the object to which to add the character  | 
644  |  |  * @param start the first character of the range to remove, inclusive  | 
645  |  |  * @param end the last character of the range to remove, inclusive  | 
646  |  |  * @stable ICU 2.2  | 
647  |  |  */  | 
648  |  | U_CAPI void U_EXPORT2  | 
649  |  | uset_removeRange(USet* set, UChar32 start, UChar32 end);  | 
650  |  |  | 
651  |  | /**  | 
652  |  |  * Removes the given string to the given USet.  After this call,  | 
653  |  |  * uset_containsString(set, str, strLen) will return false.  | 
654  |  |  * A frozen set will not be modified.  | 
655  |  |  * @param set the object to which to add the character  | 
656  |  |  * @param str the string to remove  | 
657  |  |  * @param strLen the length of the string or -1 if null terminated.  | 
658  |  |  * @stable ICU 2.4  | 
659  |  |  */  | 
660  |  | U_CAPI void U_EXPORT2  | 
661  |  | uset_removeString(USet* set, const UChar* str, int32_t strLen);  | 
662  |  |  | 
663  |  | /**  | 
664  |  |  * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"} | 
665  |  |  * A frozen set will not be modified.  | 
666  |  |  *  | 
667  |  |  * @param set the object to be modified  | 
668  |  |  * @param str the string  | 
669  |  |  * @param length the length of the string, or -1 if NUL-terminated  | 
670  |  |  * @stable ICU 69  | 
671  |  |  */  | 
672  |  | U_CAPI void U_EXPORT2  | 
673  |  | uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);  | 
674  |  |  | 
675  |  | /**  | 
676  |  |  * Removes from this set all of its elements that are contained in the  | 
677  |  |  * specified set.  This operation effectively modifies this  | 
678  |  |  * set so that its value is the <i>asymmetric set difference</i> of  | 
679  |  |  * the two sets.  | 
680  |  |  * A frozen set will not be modified.  | 
681  |  |  * @param set the object from which the elements are to be removed  | 
682  |  |  * @param removeSet the object that defines which elements will be  | 
683  |  |  * removed from this set  | 
684  |  |  * @stable ICU 3.2  | 
685  |  |  */  | 
686  |  | U_CAPI void U_EXPORT2  | 
687  |  | uset_removeAll(USet* set, const USet* removeSet);  | 
688  |  |  | 
689  |  | /**  | 
690  |  |  * Retain only the elements in this set that are contained in the  | 
691  |  |  * specified range.  If <code>start > end</code> then an empty range is  | 
692  |  |  * retained, leaving the set empty.  This is equivalent to  | 
693  |  |  * a boolean logic AND, or a set INTERSECTION.  | 
694  |  |  * A frozen set will not be modified.  | 
695  |  |  *  | 
696  |  |  * @param set the object for which to retain only the specified range  | 
697  |  |  * @param start first character, inclusive, of range  | 
698  |  |  * @param end last character, inclusive, of range  | 
699  |  |  * @stable ICU 3.2  | 
700  |  |  */  | 
701  |  | U_CAPI void U_EXPORT2  | 
702  |  | uset_retain(USet* set, UChar32 start, UChar32 end);  | 
703  |  |  | 
704  |  | /**  | 
705  |  |  * Retains only the specified string from this set if it is present.  | 
706  |  |  * Upon return this set will be empty if it did not contain s, or  | 
707  |  |  * will only contain s if it did contain s.  | 
708  |  |  * A frozen set will not be modified.  | 
709  |  |  *  | 
710  |  |  * @param set the object to be modified  | 
711  |  |  * @param str the string  | 
712  |  |  * @param length the length of the string, or -1 if NUL-terminated  | 
713  |  |  * @stable ICU 69  | 
714  |  |  */  | 
715  |  | U_CAPI void U_EXPORT2  | 
716  |  | uset_retainString(USet *set, const UChar *str, int32_t length);  | 
717  |  |  | 
718  |  | /**  | 
719  |  |  * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} | 
720  |  |  * A frozen set will not be modified.  | 
721  |  |  *  | 
722  |  |  * @param set the object to be modified  | 
723  |  |  * @param str the string  | 
724  |  |  * @param length the length of the string, or -1 if NUL-terminated  | 
725  |  |  * @stable ICU 69  | 
726  |  |  */  | 
727  |  | U_CAPI void U_EXPORT2  | 
728  |  | uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);  | 
729  |  |  | 
730  |  | /**  | 
731  |  |  * Retains only the elements in this set that are contained in the  | 
732  |  |  * specified set.  In other words, removes from this set all of  | 
733  |  |  * its elements that are not contained in the specified set.  This  | 
734  |  |  * operation effectively modifies this set so that its value is  | 
735  |  |  * the <i>intersection</i> of the two sets.  | 
736  |  |  * A frozen set will not be modified.  | 
737  |  |  *  | 
738  |  |  * @param set the object on which to perform the retain  | 
739  |  |  * @param retain set that defines which elements this set will retain  | 
740  |  |  * @stable ICU 3.2  | 
741  |  |  */  | 
742  |  | U_CAPI void U_EXPORT2  | 
743  |  | uset_retainAll(USet* set, const USet* retain);  | 
744  |  |  | 
745  |  | /**  | 
746  |  |  * Reallocate this objects internal structures to take up the least  | 
747  |  |  * possible space, without changing this object's value.  | 
748  |  |  * A frozen set will not be modified.  | 
749  |  |  *  | 
750  |  |  * @param set the object on which to perform the compact  | 
751  |  |  * @stable ICU 3.2  | 
752  |  |  */  | 
753  |  | U_CAPI void U_EXPORT2  | 
754  |  | uset_compact(USet* set);  | 
755  |  |  | 
756  |  | /**  | 
757  |  |  * This is equivalent to  | 
758  |  |  * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.  | 
759  |  |  *  | 
760  |  |  * <strong>Note:</strong> This performs a symmetric difference with all code points  | 
761  |  |  * <em>and thus retains all multicharacter strings</em>.  | 
762  |  |  * In order to achieve a “code point complement” (all code points minus this set),  | 
763  |  |  * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.  | 
764  |  |  *  | 
765  |  |  * A frozen set will not be modified.  | 
766  |  |  * @param set the set  | 
767  |  |  * @stable ICU 2.4  | 
768  |  |  */  | 
769  |  | U_CAPI void U_EXPORT2  | 
770  |  | uset_complement(USet* set);  | 
771  |  |  | 
772  |  | /**  | 
773  |  |  * Complements the specified range in this set.  Any character in  | 
774  |  |  * the range will be removed if it is in this set, or will be  | 
775  |  |  * added if it is not in this set.  If <code>start > end</code>  | 
776  |  |  * then an empty range is complemented, leaving the set unchanged.  | 
777  |  |  * This is equivalent to a boolean logic XOR.  | 
778  |  |  * A frozen set will not be modified.  | 
779  |  |  *  | 
780  |  |  * @param set the object to be modified  | 
781  |  |  * @param start first character, inclusive, of range  | 
782  |  |  * @param end last character, inclusive, of range  | 
783  |  |  * @stable ICU 69  | 
784  |  |  */  | 
785  |  | U_CAPI void U_EXPORT2  | 
786  |  | uset_complementRange(USet *set, UChar32 start, UChar32 end);  | 
787  |  |  | 
788  |  | /**  | 
789  |  |  * Complements the specified string in this set.  | 
790  |  |  * The string will be removed if it is in this set, or will be added if it is not in this set.  | 
791  |  |  * A frozen set will not be modified.  | 
792  |  |  *  | 
793  |  |  * @param set the object to be modified  | 
794  |  |  * @param str the string  | 
795  |  |  * @param length the length of the string, or -1 if NUL-terminated  | 
796  |  |  * @stable ICU 69  | 
797  |  |  */  | 
798  |  | U_CAPI void U_EXPORT2  | 
799  |  | uset_complementString(USet *set, const UChar *str, int32_t length);  | 
800  |  |  | 
801  |  | /**  | 
802  |  |  * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"} | 
803  |  |  * A frozen set will not be modified.  | 
804  |  |  *  | 
805  |  |  * @param set the object to be modified  | 
806  |  |  * @param str the string  | 
807  |  |  * @param length the length of the string, or -1 if NUL-terminated  | 
808  |  |  * @stable ICU 69  | 
809  |  |  */  | 
810  |  | U_CAPI void U_EXPORT2  | 
811  |  | uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);  | 
812  |  |  | 
813  |  | /**  | 
814  |  |  * Complements in this set all elements contained in the specified  | 
815  |  |  * set.  Any character in the other set will be removed if it is  | 
816  |  |  * in this set, or will be added if it is not in this set.  | 
817  |  |  * A frozen set will not be modified.  | 
818  |  |  *  | 
819  |  |  * @param set the set with which to complement  | 
820  |  |  * @param complement set that defines which elements will be xor'ed  | 
821  |  |  * from this set.  | 
822  |  |  * @stable ICU 3.2  | 
823  |  |  */  | 
824  |  | U_CAPI void U_EXPORT2  | 
825  |  | uset_complementAll(USet* set, const USet* complement);  | 
826  |  |  | 
827  |  | /**  | 
828  |  |  * Removes all of the elements from this set.  This set will be  | 
829  |  |  * empty after this call returns.  | 
830  |  |  * A frozen set will not be modified.  | 
831  |  |  * @param set the set  | 
832  |  |  * @stable ICU 2.4  | 
833  |  |  */  | 
834  |  | U_CAPI void U_EXPORT2  | 
835  |  | uset_clear(USet* set);  | 
836  |  |  | 
837  |  | /**  | 
838  |  |  * Close this set over the given attribute.  For the attribute  | 
839  |  |  * USET_CASE_INSENSITIVE, the result is to modify this set so that:  | 
840  |  |  *  | 
841  |  |  * 1. For each character or string 'a' in this set, all strings or  | 
842  |  |  * characters 'b' such that foldCase(a) == foldCase(b) are added  | 
843  |  |  * to this set.  | 
844  |  |  *  | 
845  |  |  * 2. For each string 'e' in the resulting set, if e !=  | 
846  |  |  * foldCase(e), 'e' will be removed.  | 
847  |  |  *  | 
848  |  |  * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] | 
849  |  |  *  | 
850  |  |  * (Here foldCase(x) refers to the operation u_strFoldCase, and a  | 
851  |  |  * == b denotes that the contents are the same, not pointer  | 
852  |  |  * comparison.)  | 
853  |  |  *  | 
854  |  |  * A frozen set will not be modified.  | 
855  |  |  *  | 
856  |  |  * @param set the set  | 
857  |  |  *  | 
858  |  |  * @param attributes bitmask for attributes to close over.  | 
859  |  |  * Valid options:  | 
860  |  |  * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.  | 
861  |  |  * These case options are mutually exclusive.  | 
862  |  |  * Unrelated options bits are ignored.  | 
863  |  |  * @stable ICU 4.2  | 
864  |  |  */  | 
865  |  | U_CAPI void U_EXPORT2  | 
866  |  | uset_closeOver(USet* set, int32_t attributes);  | 
867  |  |  | 
868  |  | /**  | 
869  |  |  * Remove all strings from this set.  | 
870  |  |  *  | 
871  |  |  * @param set the set  | 
872  |  |  * @stable ICU 4.2  | 
873  |  |  */  | 
874  |  | U_CAPI void U_EXPORT2  | 
875  |  | uset_removeAllStrings(USet* set);  | 
876  |  |  | 
877  |  | /**  | 
878  |  |  * Returns true if the given USet contains no characters and no  | 
879  |  |  * strings.  | 
880  |  |  * @param set the set  | 
881  |  |  * @return true if set is empty  | 
882  |  |  * @stable ICU 2.4  | 
883  |  |  */  | 
884  |  | U_CAPI UBool U_EXPORT2  | 
885  |  | uset_isEmpty(const USet* set);  | 
886  |  |  | 
887  |  | /**  | 
888  |  |  * @param set the set  | 
889  |  |  * @return true if this set contains multi-character strings or the empty string.  | 
890  |  |  * @stable ICU 70  | 
891  |  |  */  | 
892  |  | U_CAPI UBool U_EXPORT2  | 
893  |  | uset_hasStrings(const USet *set);  | 
894  |  |  | 
895  |  | /**  | 
896  |  |  * Returns true if the given USet contains the given character.  | 
897  |  |  * This function works faster with a frozen set.  | 
898  |  |  * @param set the set  | 
899  |  |  * @param c The codepoint to check for within the set  | 
900  |  |  * @return true if set contains c  | 
901  |  |  * @stable ICU 2.4  | 
902  |  |  */  | 
903  |  | U_CAPI UBool U_EXPORT2  | 
904  |  | uset_contains(const USet* set, UChar32 c);  | 
905  |  |  | 
906  |  | /**  | 
907  |  |  * Returns true if the given USet contains all characters c  | 
908  |  |  * where start <= c && c <= end.  | 
909  |  |  * @param set the set  | 
910  |  |  * @param start the first character of the range to test, inclusive  | 
911  |  |  * @param end the last character of the range to test, inclusive  | 
912  |  |  * @return true if set contains the range  | 
913  |  |  * @stable ICU 2.2  | 
914  |  |  */  | 
915  |  | U_CAPI UBool U_EXPORT2  | 
916  |  | uset_containsRange(const USet* set, UChar32 start, UChar32 end);  | 
917  |  |  | 
918  |  | /**  | 
919  |  |  * Returns true if the given USet contains the given string.  | 
920  |  |  * @param set the set  | 
921  |  |  * @param str the string  | 
922  |  |  * @param strLen the length of the string or -1 if null terminated.  | 
923  |  |  * @return true if set contains str  | 
924  |  |  * @stable ICU 2.4  | 
925  |  |  */  | 
926  |  | U_CAPI UBool U_EXPORT2  | 
927  |  | uset_containsString(const USet* set, const UChar* str, int32_t strLen);  | 
928  |  |  | 
929  |  | /**  | 
930  |  |  * Returns the index of the given character within this set, where  | 
931  |  |  * the set is ordered by ascending code point.  If the character  | 
932  |  |  * is not in this set, return -1.  The inverse of this method is  | 
933  |  |  * <code>charAt()</code>.  | 
934  |  |  * @param set the set  | 
935  |  |  * @param c the character to obtain the index for  | 
936  |  |  * @return an index from 0..size()-1, or -1  | 
937  |  |  * @stable ICU 3.2  | 
938  |  |  */  | 
939  |  | U_CAPI int32_t U_EXPORT2  | 
940  |  | uset_indexOf(const USet* set, UChar32 c);  | 
941  |  |  | 
942  |  | /**  | 
943  |  |  * Returns the character at the given index within this set, where  | 
944  |  |  * the set is ordered by ascending code point.  If the index is  | 
945  |  |  * out of range for characters, returns (UChar32)-1.  | 
946  |  |  * The inverse of this method is <code>indexOf()</code>.  | 
947  |  |  *  | 
948  |  |  * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()  | 
949  |  |  * with uset_getItem(), because for each call it skips linearly over <code>index</code>  | 
950  |  |  * characters in the ranges.  | 
951  |  |  *  | 
952  |  |  * @param set the set  | 
953  |  |  * @param charIndex an index from 0..size()-1 to obtain the char for  | 
954  |  |  * @return the character at the given index, or (UChar32)-1.  | 
955  |  |  * @stable ICU 3.2  | 
956  |  |  */  | 
957  |  | U_CAPI UChar32 U_EXPORT2  | 
958  |  | uset_charAt(const USet* set, int32_t charIndex);  | 
959  |  |  | 
960  |  | /**  | 
961  |  |  * Returns the number of characters and strings contained in this set.  | 
962  |  |  * The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings.  | 
963  |  |  *  | 
964  |  |  * This is slower than uset_getRangeCount() and uset_getItemCount() because  | 
965  |  |  * it counts the code points of all ranges.  | 
966  |  |  *  | 
967  |  |  * @param set the set  | 
968  |  |  * @return a non-negative integer counting the characters and strings  | 
969  |  |  * contained in set  | 
970  |  |  * @stable ICU 2.4  | 
971  |  |  * @see uset_getRangeCount  | 
972  |  |  * @see uset_getStringCount  | 
973  |  |  * @see uset_getItemCount  | 
974  |  |  */  | 
975  |  | U_CAPI int32_t U_EXPORT2  | 
976  |  | uset_size(const USet* set);  | 
977  |  |  | 
978  |  | /**  | 
979  |  |  * @param set the set  | 
980  |  |  * @return the number of ranges in this set.  | 
981  |  |  * @stable ICU 70  | 
982  |  |  * @see uset_getItemCount  | 
983  |  |  * @see uset_getItem  | 
984  |  |  * @see uset_getStringCount  | 
985  |  |  * @see uset_size  | 
986  |  |  */  | 
987  |  | U_CAPI int32_t U_EXPORT2  | 
988  |  | uset_getRangeCount(const USet *set);  | 
989  |  |  | 
990  |  | #ifndef U_HIDE_DRAFT_API  | 
991  |  |  | 
992  |  | /**  | 
993  |  |  * @param set the set  | 
994  |  |  * @return the number of strings in this set.  | 
995  |  |  * @draft ICU 76  | 
996  |  |  * @see uset_getRangeCount  | 
997  |  |  * @see uset_getItemCount  | 
998  |  |  * @see uset_size  | 
999  |  |  */  | 
1000  |  | U_CAPI int32_t U_EXPORT2  | 
1001  |  | uset_getStringCount(const USet *set);  | 
1002  |  |  | 
1003  |  | /**  | 
1004  |  |  * Returns the index-th string (empty or multi-character) in the set.  | 
1005  |  |  * The string may not be NUL-terminated.  | 
1006  |  |  * The output length must be used, and the caller must not read more than that many UChars.  | 
1007  |  |  *  | 
1008  |  |  * @param set the set  | 
1009  |  |  * @param index the string index, 0 .. uset_getStringCount() - 1  | 
1010  |  |  * @param pLength the output string length; must not be NULL  | 
1011  |  |  * @return the pointer to the string; NULL if the index is out of range or pLength is NULL  | 
1012  |  |  * @draft ICU 76  | 
1013  |  |  * @see uset_getStringCount  | 
1014  |  |  */  | 
1015  |  | U_CAPI const UChar* U_EXPORT2  | 
1016  |  | uset_getString(const USet *set, int32_t index, int32_t *pLength);  | 
1017  |  |  | 
1018  |  | #endif  // U_HIDE_DRAFT_API  | 
1019  |  |  | 
1020  |  | /**  | 
1021  |  |  * Returns the number of items in this set.  An item is either a range  | 
1022  |  |  * of characters or a single multicharacter string.  | 
1023  |  |  * @param set the set  | 
1024  |  |  * @return a non-negative integer counting the character ranges  | 
1025  |  |  * and/or strings contained in set  | 
1026  |  |  * @stable ICU 2.4  | 
1027  |  |  * @see uset_getRangeCount  | 
1028  |  |  * @see uset_getStringCount  | 
1029  |  |  */  | 
1030  |  | U_CAPI int32_t U_EXPORT2  | 
1031  |  | uset_getItemCount(const USet* set);  | 
1032  |  |  | 
1033  |  | /**  | 
1034  |  |  * Returns an item of this set.  An item is either a range of  | 
1035  |  |  * characters or a single multicharacter string (which can be the empty string).  | 
1036  |  |  *  | 
1037  |  |  * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,  | 
1038  |  |  * and the range is <code>*start</code>..<code>*end</code>.  | 
1039  |  |  *  | 
1040  |  |  * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then  | 
1041  |  |  * this function copies the string into <code>str[strCapacity]</code> and  | 
1042  |  |  * returns the length of the string (0 for the empty string).  | 
1043  |  |  * See uset_getString() for a function that does not copy the string contents.  | 
1044  |  |  *  | 
1045  |  |  * If <code>itemIndex</code> is out of range, then this function returns -1.  | 
1046  |  |  *  | 
1047  |  |  * Note that 0 is returned for each range as well as for the empty string.  | 
1048  |  |  *  | 
1049  |  |  * @param set the set  | 
1050  |  |  * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1  | 
1051  |  |  * @param start pointer to variable to receive first character in range, inclusive;  | 
1052  |  |  *              can be NULL for a string item  | 
1053  |  |  * @param end pointer to variable to receive last character in range, inclusive;  | 
1054  |  |  *            can be NULL for a string item  | 
1055  |  |  * @param str buffer to receive the string, may be NULL  | 
1056  |  |  * @param strCapacity capacity of str, or 0 if str is NULL  | 
1057  |  |  * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range  | 
1058  |  |  * @return the length of the string (0 or >= 2), or 0 if the item is a range,  | 
1059  |  |  *         or -1 if the itemIndex is out of range  | 
1060  |  |  * @stable ICU 2.4  | 
1061  |  |  * @see uset_getString  | 
1062  |  |  */  | 
1063  |  | U_CAPI int32_t U_EXPORT2  | 
1064  |  | uset_getItem(const USet* set, int32_t itemIndex,  | 
1065  |  |              UChar32* start, UChar32* end,  | 
1066  |  |              UChar* str, int32_t strCapacity,  | 
1067  |  |              UErrorCode* ec);  | 
1068  |  |  | 
1069  |  | /**  | 
1070  |  |  * Returns true if set1 contains all the characters and strings  | 
1071  |  |  * of set2. It answers the question, 'Is set1 a superset of set2?'  | 
1072  |  |  * @param set1 set to be checked for containment  | 
1073  |  |  * @param set2 set to be checked for containment  | 
1074  |  |  * @return true if the test condition is met  | 
1075  |  |  * @stable ICU 3.2  | 
1076  |  |  */  | 
1077  |  | U_CAPI UBool U_EXPORT2  | 
1078  |  | uset_containsAll(const USet* set1, const USet* set2);  | 
1079  |  |  | 
1080  |  | /**  | 
1081  |  |  * Returns true if this set contains all the characters  | 
1082  |  |  * of the given string. This is does not check containment of grapheme  | 
1083  |  |  * clusters, like uset_containsString.  | 
1084  |  |  * @param set set of characters to be checked for containment  | 
1085  |  |  * @param str string containing codepoints to be checked for containment  | 
1086  |  |  * @param strLen the length of the string or -1 if null terminated.  | 
1087  |  |  * @return true if the test condition is met  | 
1088  |  |  * @stable ICU 3.4  | 
1089  |  |  */  | 
1090  |  | U_CAPI UBool U_EXPORT2  | 
1091  |  | uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);  | 
1092  |  |  | 
1093  |  | /**  | 
1094  |  |  * Returns true if set1 contains none of the characters and strings  | 
1095  |  |  * of set2. It answers the question, 'Is set1 a disjoint set of set2?'  | 
1096  |  |  * @param set1 set to be checked for containment  | 
1097  |  |  * @param set2 set to be checked for containment  | 
1098  |  |  * @return true if the test condition is met  | 
1099  |  |  * @stable ICU 3.2  | 
1100  |  |  */  | 
1101  |  | U_CAPI UBool U_EXPORT2  | 
1102  |  | uset_containsNone(const USet* set1, const USet* set2);  | 
1103  |  |  | 
1104  |  | /**  | 
1105  |  |  * Returns true if set1 contains some of the characters and strings  | 
1106  |  |  * of set2. It answers the question, 'Does set1 and set2 have an intersection?'  | 
1107  |  |  * @param set1 set to be checked for containment  | 
1108  |  |  * @param set2 set to be checked for containment  | 
1109  |  |  * @return true if the test condition is met  | 
1110  |  |  * @stable ICU 3.2  | 
1111  |  |  */  | 
1112  |  | U_CAPI UBool U_EXPORT2  | 
1113  |  | uset_containsSome(const USet* set1, const USet* set2);  | 
1114  |  |  | 
1115  |  | /**  | 
1116  |  |  * Returns the length of the initial substring of the input string which  | 
1117  |  |  * consists only of characters and strings that are contained in this set  | 
1118  |  |  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),  | 
1119  |  |  * or only of characters and strings that are not contained  | 
1120  |  |  * in this set (USET_SPAN_NOT_CONTAINED).  | 
1121  |  |  * See USetSpanCondition for details.  | 
1122  |  |  * Similar to the strspn() C library function.  | 
1123  |  |  * Unpaired surrogates are treated according to contains() of their surrogate code points.  | 
1124  |  |  * This function works faster with a frozen set and with a non-negative string length argument.  | 
1125  |  |  * @param set the set  | 
1126  |  |  * @param s start of the string  | 
1127  |  |  * @param length of the string; can be -1 for NUL-terminated  | 
1128  |  |  * @param spanCondition specifies the containment condition  | 
1129  |  |  * @return the length of the initial substring according to the spanCondition;  | 
1130  |  |  *         0 if the start of the string does not fit the spanCondition  | 
1131  |  |  * @stable ICU 3.8  | 
1132  |  |  * @see USetSpanCondition  | 
1133  |  |  */  | 
1134  |  | U_CAPI int32_t U_EXPORT2  | 
1135  |  | uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);  | 
1136  |  |  | 
1137  |  | /**  | 
1138  |  |  * Returns the start of the trailing substring of the input string which  | 
1139  |  |  * consists only of characters and strings that are contained in this set  | 
1140  |  |  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),  | 
1141  |  |  * or only of characters and strings that are not contained  | 
1142  |  |  * in this set (USET_SPAN_NOT_CONTAINED).  | 
1143  |  |  * See USetSpanCondition for details.  | 
1144  |  |  * Unpaired surrogates are treated according to contains() of their surrogate code points.  | 
1145  |  |  * This function works faster with a frozen set and with a non-negative string length argument.  | 
1146  |  |  * @param set the set  | 
1147  |  |  * @param s start of the string  | 
1148  |  |  * @param length of the string; can be -1 for NUL-terminated  | 
1149  |  |  * @param spanCondition specifies the containment condition  | 
1150  |  |  * @return the start of the trailing substring according to the spanCondition;  | 
1151  |  |  *         the string length if the end of the string does not fit the spanCondition  | 
1152  |  |  * @stable ICU 3.8  | 
1153  |  |  * @see USetSpanCondition  | 
1154  |  |  */  | 
1155  |  | U_CAPI int32_t U_EXPORT2  | 
1156  |  | uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);  | 
1157  |  |  | 
1158  |  | /**  | 
1159  |  |  * Returns the length of the initial substring of the input string which  | 
1160  |  |  * consists only of characters and strings that are contained in this set  | 
1161  |  |  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),  | 
1162  |  |  * or only of characters and strings that are not contained  | 
1163  |  |  * in this set (USET_SPAN_NOT_CONTAINED).  | 
1164  |  |  * See USetSpanCondition for details.  | 
1165  |  |  * Similar to the strspn() C library function.  | 
1166  |  |  * Malformed byte sequences are treated according to contains(0xfffd).  | 
1167  |  |  * This function works faster with a frozen set and with a non-negative string length argument.  | 
1168  |  |  * @param set the set  | 
1169  |  |  * @param s start of the string (UTF-8)  | 
1170  |  |  * @param length of the string; can be -1 for NUL-terminated  | 
1171  |  |  * @param spanCondition specifies the containment condition  | 
1172  |  |  * @return the length of the initial substring according to the spanCondition;  | 
1173  |  |  *         0 if the start of the string does not fit the spanCondition  | 
1174  |  |  * @stable ICU 3.8  | 
1175  |  |  * @see USetSpanCondition  | 
1176  |  |  */  | 
1177  |  | U_CAPI int32_t U_EXPORT2  | 
1178  |  | uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);  | 
1179  |  |  | 
1180  |  | /**  | 
1181  |  |  * Returns the start of the trailing substring of the input string which  | 
1182  |  |  * consists only of characters and strings that are contained in this set  | 
1183  |  |  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),  | 
1184  |  |  * or only of characters and strings that are not contained  | 
1185  |  |  * in this set (USET_SPAN_NOT_CONTAINED).  | 
1186  |  |  * See USetSpanCondition for details.  | 
1187  |  |  * Malformed byte sequences are treated according to contains(0xfffd).  | 
1188  |  |  * This function works faster with a frozen set and with a non-negative string length argument.  | 
1189  |  |  * @param set the set  | 
1190  |  |  * @param s start of the string (UTF-8)  | 
1191  |  |  * @param length of the string; can be -1 for NUL-terminated  | 
1192  |  |  * @param spanCondition specifies the containment condition  | 
1193  |  |  * @return the start of the trailing substring according to the spanCondition;  | 
1194  |  |  *         the string length if the end of the string does not fit the spanCondition  | 
1195  |  |  * @stable ICU 3.8  | 
1196  |  |  * @see USetSpanCondition  | 
1197  |  |  */  | 
1198  |  | U_CAPI int32_t U_EXPORT2  | 
1199  |  | uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);  | 
1200  |  |  | 
1201  |  | /**  | 
1202  |  |  * Returns true if set1 contains all of the characters and strings  | 
1203  |  |  * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'  | 
1204  |  |  * @param set1 set to be checked for containment  | 
1205  |  |  * @param set2 set to be checked for containment  | 
1206  |  |  * @return true if the test condition is met  | 
1207  |  |  * @stable ICU 3.2  | 
1208  |  |  */  | 
1209  |  | U_CAPI UBool U_EXPORT2  | 
1210  |  | uset_equals(const USet* set1, const USet* set2);  | 
1211  |  |  | 
1212  |  | /*********************************************************************  | 
1213  |  |  * Serialized set API  | 
1214  |  |  *********************************************************************/  | 
1215  |  |  | 
1216  |  | /**  | 
1217  |  |  * Serializes this set into an array of 16-bit integers.  Serialization  | 
1218  |  |  * (currently) only records the characters in the set; multicharacter  | 
1219  |  |  * strings are ignored.  | 
1220  |  |  *  | 
1221  |  |  * The array  | 
1222  |  |  * has following format (each line is one 16-bit integer):  | 
1223  |  |  *  | 
1224  |  |  *  length     = (n+2*m) | (m!=0?0x8000:0)  | 
1225  |  |  *  bmpLength  = n; present if m!=0  | 
1226  |  |  *  bmp[0]  | 
1227  |  |  *  bmp[1]  | 
1228  |  |  *  ...  | 
1229  |  |  *  bmp[n-1]  | 
1230  |  |  *  supp-high[0]  | 
1231  |  |  *  supp-low[0]  | 
1232  |  |  *  supp-high[1]  | 
1233  |  |  *  supp-low[1]  | 
1234  |  |  *  ...  | 
1235  |  |  *  supp-high[m-1]  | 
1236  |  |  *  supp-low[m-1]  | 
1237  |  |  *  | 
1238  |  |  * The array starts with a header.  After the header are n bmp  | 
1239  |  |  * code points, then m supplementary code points.  Either n or m  | 
1240  |  |  * or both may be zero.  n+2*m is always <= 0x7FFF.  | 
1241  |  |  *  | 
1242  |  |  * If there are no supplementary characters (if m==0) then the  | 
1243  |  |  * header is one 16-bit integer, 'length', with value n.  | 
1244  |  |  *  | 
1245  |  |  * If there are supplementary characters (if m!=0) then the header  | 
1246  |  |  * is two 16-bit integers.  The first, 'length', has value  | 
1247  |  |  * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.  | 
1248  |  |  *  | 
1249  |  |  * After the header the code points are stored in ascending order.  | 
1250  |  |  * Supplementary code points are stored as most significant 16  | 
1251  |  |  * bits followed by least significant 16 bits.  | 
1252  |  |  *  | 
1253  |  |  * @param set the set  | 
1254  |  |  * @param dest pointer to buffer of destCapacity 16-bit integers.  | 
1255  |  |  * May be NULL only if destCapacity is zero.  | 
1256  |  |  * @param destCapacity size of dest, or zero.  Must not be negative.  | 
1257  |  |  * @param pErrorCode pointer to the error code.  Will be set to  | 
1258  |  |  * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to  | 
1259  |  |  * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.  | 
1260  |  |  * @return the total length of the serialized format, including  | 
1261  |  |  * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other  | 
1262  |  |  * than U_BUFFER_OVERFLOW_ERROR.  | 
1263  |  |  * @stable ICU 2.4  | 
1264  |  |  */  | 
1265  |  | U_CAPI int32_t U_EXPORT2  | 
1266  |  | uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);  | 
1267  |  |  | 
1268  |  | /**  | 
1269  |  |  * Given a serialized array, fill in the given serialized set object.  | 
1270  |  |  * @param fillSet pointer to result  | 
1271  |  |  * @param src pointer to start of array  | 
1272  |  |  * @param srcLength length of array  | 
1273  |  |  * @return true if the given array is valid, otherwise false  | 
1274  |  |  * @stable ICU 2.4  | 
1275  |  |  */  | 
1276  |  | U_CAPI UBool U_EXPORT2  | 
1277  |  | uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);  | 
1278  |  |  | 
1279  |  | /**  | 
1280  |  |  * Set the USerializedSet to contain the given character (and nothing  | 
1281  |  |  * else).  | 
1282  |  |  * @param fillSet pointer to result  | 
1283  |  |  * @param c The codepoint to set  | 
1284  |  |  * @stable ICU 2.4  | 
1285  |  |  */  | 
1286  |  | U_CAPI void U_EXPORT2  | 
1287  |  | uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);  | 
1288  |  |  | 
1289  |  | /**  | 
1290  |  |  * Returns true if the given USerializedSet contains the given  | 
1291  |  |  * character.  | 
1292  |  |  * @param set the serialized set  | 
1293  |  |  * @param c The codepoint to check for within the set  | 
1294  |  |  * @return true if set contains c  | 
1295  |  |  * @stable ICU 2.4  | 
1296  |  |  */  | 
1297  |  | U_CAPI UBool U_EXPORT2  | 
1298  |  | uset_serializedContains(const USerializedSet* set, UChar32 c);  | 
1299  |  |  | 
1300  |  | /**  | 
1301  |  |  * Returns the number of disjoint ranges of characters contained in  | 
1302  |  |  * the given serialized set.  Ignores any strings contained in the  | 
1303  |  |  * set.  | 
1304  |  |  * @param set the serialized set  | 
1305  |  |  * @return a non-negative integer counting the character ranges  | 
1306  |  |  * contained in set  | 
1307  |  |  * @stable ICU 2.4  | 
1308  |  |  */  | 
1309  |  | U_CAPI int32_t U_EXPORT2  | 
1310  |  | uset_getSerializedRangeCount(const USerializedSet* set);  | 
1311  |  |  | 
1312  |  | /**  | 
1313  |  |  * Returns a range of characters contained in the given serialized  | 
1314  |  |  * set.  | 
1315  |  |  * @param set the serialized set  | 
1316  |  |  * @param rangeIndex a non-negative integer in the range 0..  | 
1317  |  |  * uset_getSerializedRangeCount(set)-1  | 
1318  |  |  * @param pStart pointer to variable to receive first character  | 
1319  |  |  * in range, inclusive  | 
1320  |  |  * @param pEnd pointer to variable to receive last character in range,  | 
1321  |  |  * inclusive  | 
1322  |  |  * @return true if rangeIndex is valid, otherwise false  | 
1323  |  |  * @stable ICU 2.4  | 
1324  |  |  */  | 
1325  |  | U_CAPI UBool U_EXPORT2  | 
1326  |  | uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,  | 
1327  |  |                         UChar32* pStart, UChar32* pEnd);  | 
1328  |  |  | 
1329  |  | #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API  | 
1330  |  | #ifndef U_HIDE_DRAFT_API  | 
1331  |  |  | 
1332  |  | namespace U_HEADER_ONLY_NAMESPACE { | 
1333  |  |  | 
1334  |  | // Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,  | 
1335  |  | // not intended to be used via export from the ICU DLL.  | 
1336  |  |  | 
1337  |  | /**  | 
1338  |  |  * Iterator returned by USetCodePoints.  | 
1339  |  |  * @draft ICU 76  | 
1340  |  |  */  | 
1341  |  | class USetCodePointIterator { | 
1342  |  | public:  | 
1343  |  |     /** @draft ICU 76 */  | 
1344  |  |     USetCodePointIterator(const USetCodePointIterator &other) = default;  | 
1345  |  |  | 
1346  |  |     /** @draft ICU 76 */  | 
1347  | 0  |     bool operator==(const USetCodePointIterator &other) const { | 
1348  | 0  |         // No need to compare rangeCount & end given private constructor  | 
1349  | 0  |         // and assuming we don't compare iterators across the set being modified.  | 
1350  | 0  |         // And comparing rangeIndex is redundant with comparing c.  | 
1351  | 0  |         // We might even skip comparing uset.  | 
1352  | 0  |         // Unless we want operator==() to be "correct" for more than iteration.  | 
1353  | 0  |         return uset == other.uset && c == other.c;  | 
1354  | 0  |     }  | 
1355  |  |  | 
1356  |  |     /** @draft ICU 76 */  | 
1357  | 0  |     bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); } | 
1358  |  |  | 
1359  |  |     /** @draft ICU 76 */  | 
1360  | 0  |     UChar32 operator*() const { return c; } | 
1361  |  |  | 
1362  |  |     /**  | 
1363  |  |      * Pre-increment.  | 
1364  |  |      * @draft ICU 76  | 
1365  |  |      */  | 
1366  | 0  |     USetCodePointIterator &operator++() { | 
1367  | 0  |         if (c < end) { | 
1368  | 0  |             ++c;  | 
1369  | 0  |         } else if (rangeIndex < rangeCount) { | 
1370  | 0  |             UErrorCode errorCode = U_ZERO_ERROR;  | 
1371  | 0  |             int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);  | 
1372  | 0  |             if (U_SUCCESS(errorCode) && result == 0) { | 
1373  | 0  |                 ++rangeIndex;  | 
1374  | 0  |             } else { | 
1375  | 0  |                 c = end = U_SENTINEL;  | 
1376  | 0  |             }  | 
1377  | 0  |         } else { | 
1378  | 0  |             c = end = U_SENTINEL;  | 
1379  | 0  |         }  | 
1380  | 0  |         return *this;  | 
1381  | 0  |     }  | 
1382  |  |  | 
1383  |  |     /**  | 
1384  |  |      * Post-increment.  | 
1385  |  |      * @draft ICU 76  | 
1386  |  |      */  | 
1387  | 0  |     USetCodePointIterator operator++(int) { | 
1388  | 0  |         USetCodePointIterator result(*this);  | 
1389  | 0  |         operator++();  | 
1390  | 0  |         return result;  | 
1391  | 0  |     }  | 
1392  |  |  | 
1393  |  | private:  | 
1394  |  |     friend class USetCodePoints;  | 
1395  |  |  | 
1396  |  |     USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)  | 
1397  |  |             : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),  | 
1398  | 0  |                 c(U_SENTINEL), end(U_SENTINEL) { | 
1399  | 0  |         // Fetch the first range.  | 
1400  | 0  |         operator++();  | 
1401  | 0  |     }  | 
1402  |  |  | 
1403  |  |     const USet *uset;  | 
1404  |  |     int32_t rangeIndex;  | 
1405  |  |     int32_t rangeCount;  | 
1406  |  |     UChar32 c, end;  | 
1407  |  | };  | 
1408  |  |  | 
1409  |  | /**  | 
1410  |  |  * C++ "range" for iterating over the code points of a USet.  | 
1411  |  |  *  | 
1412  |  |  * \code  | 
1413  |  |  * using U_HEADER_NESTED_NAMESPACE::USetCodePoints;  | 
1414  |  |  * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));  | 
1415  |  |  * for (UChar32 c : USetCodePoints(uset.getAlias())) { | 
1416  |  |  *     printf("uset.codePoint U+%04lx\n", (long)c); | 
1417  |  |  * }  | 
1418  |  |  * \endcode  | 
1419  |  |  *  | 
1420  |  |  * C++ UnicodeSet has member functions for iteration, including codePoints().  | 
1421  |  |  *  | 
1422  |  |  * @draft ICU 76  | 
1423  |  |  * @see USetRanges  | 
1424  |  |  * @see USetStrings  | 
1425  |  |  * @see USetElements  | 
1426  |  |  */  | 
1427  |  | class USetCodePoints { | 
1428  |  | public:  | 
1429  |  |     /**  | 
1430  |  |      * Constructs a C++ "range" object over the code points of the USet.  | 
1431  |  |      * @draft ICU 76  | 
1432  |  |      */  | 
1433  | 0  |     USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {} | 
1434  |  |  | 
1435  |  |     /** @draft ICU 76 */  | 
1436  |  |     USetCodePoints(const USetCodePoints &other) = default;  | 
1437  |  |  | 
1438  |  |     /** @draft ICU 76 */  | 
1439  | 0  |     USetCodePointIterator begin() const { | 
1440  | 0  |         return USetCodePointIterator(uset, 0, rangeCount);  | 
1441  | 0  |     }  | 
1442  |  |  | 
1443  |  |     /** @draft ICU 76 */  | 
1444  | 0  |     USetCodePointIterator end() const { | 
1445  | 0  |         return USetCodePointIterator(uset, rangeCount, rangeCount);  | 
1446  | 0  |     }  | 
1447  |  |  | 
1448  |  | private:  | 
1449  |  |     const USet *uset;  | 
1450  |  |     int32_t rangeCount;  | 
1451  |  | };  | 
1452  |  |  | 
1453  |  | /**  | 
1454  |  |  * A contiguous range of code points in a USet/UnicodeSet.  | 
1455  |  |  * Returned by USetRangeIterator which is returned by USetRanges.  | 
1456  |  |  * Both the rangeStart and rangeEnd are in the range.  | 
1457  |  |  * (end() returns an iterator corresponding to rangeEnd+1.)  | 
1458  |  |  * @draft ICU 76  | 
1459  |  |  */  | 
1460  |  | struct CodePointRange { | 
1461  |  |     /** @draft ICU 76 */  | 
1462  |  |     struct iterator { | 
1463  |  |         /** @draft ICU 76 */  | 
1464  | 0  |         iterator(UChar32 aC) : c(aC) {} | 
1465  |  |  | 
1466  |  |         /** @draft ICU 76 */  | 
1467  | 0  |         bool operator==(const iterator &other) const { return c == other.c; } | 
1468  |  |         /** @draft ICU 76 */  | 
1469  | 0  |         bool operator!=(const iterator &other) const { return !operator==(other); } | 
1470  |  |  | 
1471  |  |         /** @draft ICU 76 */  | 
1472  | 0  |         UChar32 operator*() const { return c; } | 
1473  |  |  | 
1474  |  |         /**  | 
1475  |  |          * Pre-increment.  | 
1476  |  |          * @draft ICU 76  | 
1477  |  |          */  | 
1478  | 0  |         iterator &operator++() { | 
1479  | 0  |             ++c;  | 
1480  | 0  |             return *this;  | 
1481  | 0  |         }  | 
1482  |  |  | 
1483  |  |         /**  | 
1484  |  |          * Post-increment.  | 
1485  |  |          * @draft ICU 76  | 
1486  |  |          */  | 
1487  | 0  |         iterator operator++(int) { | 
1488  | 0  |             return c++;  | 
1489  | 0  |         }  | 
1490  |  |  | 
1491  |  |         /**  | 
1492  |  |          * The current code point in the range.  | 
1493  |  |          * @draft ICU 76  | 
1494  |  |          */  | 
1495  |  |         UChar32 c;  | 
1496  |  |     };  | 
1497  |  |  | 
1498  |  |     /** @draft ICU 76 */  | 
1499  | 0  |     CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {} | 
1500  |  |     /** @draft ICU 76 */  | 
1501  |  |     CodePointRange(const CodePointRange &other) = default;  | 
1502  |  |     /** @draft ICU 76 */  | 
1503  | 0  |     size_t size() const { return (rangeEnd + 1) - rangeStart; } | 
1504  |  |     /** @draft ICU 76 */  | 
1505  | 0  |     iterator begin() const { return rangeStart; } | 
1506  |  |     /** @draft ICU 76 */  | 
1507  | 0  |     iterator end() const { return rangeEnd + 1; } | 
1508  |  |  | 
1509  |  |     /**  | 
1510  |  |      * Start of a USet/UnicodeSet range of code points.  | 
1511  |  |      * @draft ICU 76  | 
1512  |  |      */  | 
1513  |  |     UChar32 rangeStart;  | 
1514  |  |     /**  | 
1515  |  |      * Inclusive end of a USet/UnicodeSet range of code points.  | 
1516  |  |      * @draft ICU 76  | 
1517  |  |      */  | 
1518  |  |     UChar32 rangeEnd;  | 
1519  |  | };  | 
1520  |  |  | 
1521  |  | /**  | 
1522  |  |  * Iterator returned by USetRanges.  | 
1523  |  |  * @draft ICU 76  | 
1524  |  |  */  | 
1525  |  | class USetRangeIterator { | 
1526  |  | public:  | 
1527  |  |     /** @draft ICU 76 */  | 
1528  |  |     USetRangeIterator(const USetRangeIterator &other) = default;  | 
1529  |  |  | 
1530  |  |     /** @draft ICU 76 */  | 
1531  | 0  |     bool operator==(const USetRangeIterator &other) const { | 
1532  | 0  |         // No need to compare rangeCount given private constructor  | 
1533  | 0  |         // and assuming we don't compare iterators across the set being modified.  | 
1534  | 0  |         // We might even skip comparing uset.  | 
1535  | 0  |         // Unless we want operator==() to be "correct" for more than iteration.  | 
1536  | 0  |         return uset == other.uset && rangeIndex == other.rangeIndex;  | 
1537  | 0  |     }  | 
1538  |  |  | 
1539  |  |     /** @draft ICU 76 */  | 
1540  | 0  |     bool operator!=(const USetRangeIterator &other) const { return !operator==(other); } | 
1541  |  |  | 
1542  |  |     /** @draft ICU 76 */  | 
1543  | 0  |     CodePointRange operator*() const { | 
1544  | 0  |         if (rangeIndex < rangeCount) { | 
1545  | 0  |             UChar32 start, end;  | 
1546  | 0  |             UErrorCode errorCode = U_ZERO_ERROR;  | 
1547  | 0  |             int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);  | 
1548  | 0  |             if (U_SUCCESS(errorCode) && result == 0) { | 
1549  | 0  |                 return CodePointRange(start, end);  | 
1550  | 0  |             }  | 
1551  | 0  |         }  | 
1552  | 0  |         return CodePointRange(U_SENTINEL, U_SENTINEL);  | 
1553  | 0  |     }  | 
1554  |  |  | 
1555  |  |     /**  | 
1556  |  |      * Pre-increment.  | 
1557  |  |      * @draft ICU 76  | 
1558  |  |      */  | 
1559  | 0  |     USetRangeIterator &operator++() { | 
1560  | 0  |         ++rangeIndex;  | 
1561  | 0  |         return *this;  | 
1562  | 0  |     }  | 
1563  |  |  | 
1564  |  |     /**  | 
1565  |  |      * Post-increment.  | 
1566  |  |      * @draft ICU 76  | 
1567  |  |      */  | 
1568  | 0  |     USetRangeIterator operator++(int) { | 
1569  | 0  |         USetRangeIterator result(*this);  | 
1570  | 0  |         ++rangeIndex;  | 
1571  | 0  |         return result;  | 
1572  | 0  |     }  | 
1573  |  |  | 
1574  |  | private:  | 
1575  |  |     friend class USetRanges;  | 
1576  |  |  | 
1577  |  |     USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)  | 
1578  | 0  |             : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {} | 
1579  |  |  | 
1580  |  |     const USet *uset;  | 
1581  |  |     int32_t rangeIndex;  | 
1582  |  |     int32_t rangeCount;  | 
1583  |  | };  | 
1584  |  |  | 
1585  |  | /**  | 
1586  |  |  * C++ "range" for iterating over the code point ranges of a USet.  | 
1587  |  |  *  | 
1588  |  |  * \code  | 
1589  |  |  * using U_HEADER_NESTED_NAMESPACE::USetRanges;  | 
1590  |  |  * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));  | 
1591  |  |  * for (auto [start, end] : USetRanges(uset.getAlias())) { | 
1592  |  |  *     printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end); | 
1593  |  |  * }  | 
1594  |  |  * for (auto range : USetRanges(uset.getAlias())) { | 
1595  |  |  *     for (UChar32 c : range) { | 
1596  |  |  *         printf("uset.range.c U+%04lx\n", (long)c); | 
1597  |  |  *     }  | 
1598  |  |  * }  | 
1599  |  |  * \endcode  | 
1600  |  |  *  | 
1601  |  |  * C++ UnicodeSet has member functions for iteration, including ranges().  | 
1602  |  |  *  | 
1603  |  |  * @draft ICU 76  | 
1604  |  |  * @see USetCodePoints  | 
1605  |  |  * @see USetStrings  | 
1606  |  |  * @see USetElements  | 
1607  |  |  */  | 
1608  |  | class USetRanges { | 
1609  |  | public:  | 
1610  |  |     /**  | 
1611  |  |      * Constructs a C++ "range" object over the code point ranges of the USet.  | 
1612  |  |      * @draft ICU 76  | 
1613  |  |      */  | 
1614  | 0  |     USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {} | 
1615  |  |  | 
1616  |  |     /** @draft ICU 76 */  | 
1617  |  |     USetRanges(const USetRanges &other) = default;  | 
1618  |  |  | 
1619  |  |     /** @draft ICU 76 */  | 
1620  | 0  |     USetRangeIterator begin() const { | 
1621  | 0  |         return USetRangeIterator(uset, 0, rangeCount);  | 
1622  | 0  |     }  | 
1623  |  |  | 
1624  |  |     /** @draft ICU 76 */  | 
1625  | 0  |     USetRangeIterator end() const { | 
1626  | 0  |         return USetRangeIterator(uset, rangeCount, rangeCount);  | 
1627  | 0  |     }  | 
1628  |  |  | 
1629  |  | private:  | 
1630  |  |     const USet *uset;  | 
1631  |  |     int32_t rangeCount;  | 
1632  |  | };  | 
1633  |  |  | 
1634  |  | /**  | 
1635  |  |  * Iterator returned by USetStrings.  | 
1636  |  |  * @draft ICU 76  | 
1637  |  |  */  | 
1638  |  | class USetStringIterator { | 
1639  |  | public:  | 
1640  |  |     /** @draft ICU 76 */  | 
1641  |  |     USetStringIterator(const USetStringIterator &other) = default;  | 
1642  |  |  | 
1643  |  |     /** @draft ICU 76 */  | 
1644  | 0  |     bool operator==(const USetStringIterator &other) const { | 
1645  | 0  |         // No need to compare count given private constructor  | 
1646  | 0  |         // and assuming we don't compare iterators across the set being modified.  | 
1647  | 0  |         // We might even skip comparing uset.  | 
1648  | 0  |         // Unless we want operator==() to be "correct" for more than iteration.  | 
1649  | 0  |         return uset == other.uset && index == other.index;  | 
1650  | 0  |     }  | 
1651  |  |  | 
1652  |  |     /** @draft ICU 76 */  | 
1653  | 0  |     bool operator!=(const USetStringIterator &other) const { return !operator==(other); } | 
1654  |  |  | 
1655  |  |     /** @draft ICU 76 */  | 
1656  | 0  |     std::u16string_view operator*() const { | 
1657  | 0  |         if (index < count) { | 
1658  | 0  |             int32_t length;  | 
1659  | 0  |             const UChar *uchars = uset_getString(uset, index, &length);  | 
1660  | 0  |             // assert uchars != nullptr;  | 
1661  | 0  |             return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)}; | 
1662  | 0  |         }  | 
1663  | 0  |         return {}; | 
1664  | 0  |     }  | 
1665  |  |  | 
1666  |  |     /**  | 
1667  |  |      * Pre-increment.  | 
1668  |  |      * @draft ICU 76  | 
1669  |  |      */  | 
1670  | 0  |     USetStringIterator &operator++() { | 
1671  | 0  |         ++index;  | 
1672  | 0  |         return *this;  | 
1673  | 0  |     }  | 
1674  |  |  | 
1675  |  |     /**  | 
1676  |  |      * Post-increment.  | 
1677  |  |      * @draft ICU 76  | 
1678  |  |      */  | 
1679  | 0  |     USetStringIterator operator++(int) { | 
1680  | 0  |         USetStringIterator result(*this);  | 
1681  | 0  |         ++index;  | 
1682  | 0  |         return result;  | 
1683  | 0  |     }  | 
1684  |  |  | 
1685  |  | private:  | 
1686  |  |     friend class USetStrings;  | 
1687  |  |  | 
1688  |  |     USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)  | 
1689  | 0  |             : uset(pUset), index(nIndex), count(nCount) {} | 
1690  |  |  | 
1691  |  |     const USet *uset;  | 
1692  |  |     int32_t index;  | 
1693  |  |     int32_t count;  | 
1694  |  | };  | 
1695  |  |  | 
1696  |  | /**  | 
1697  |  |  * C++ "range" for iterating over the empty and multi-character strings of a USet.  | 
1698  |  |  *  | 
1699  |  |  * \code  | 
1700  |  |  * using U_HEADER_NESTED_NAMESPACE::USetStrings;  | 
1701  |  |  * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); | 
1702  |  |  * for (auto s : USetStrings(uset.getAlias())) { | 
1703  |  |  *     int32_t len32 = s.length();  | 
1704  |  |  *     char utf8[200];  | 
1705  |  |  *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr, | 
1706  |  |  *                        s.data(), len32, 0xFFFD, nullptr, errorCode);  | 
1707  |  |  *     printf("uset.string length %ld \"%s\"\n", long{len32}, utf8); | 
1708  |  |  * }  | 
1709  |  |  * \endcode  | 
1710  |  |  *  | 
1711  |  |  * C++ UnicodeSet has member functions for iteration, including strings().  | 
1712  |  |  *  | 
1713  |  |  * @draft ICU 76  | 
1714  |  |  * @see USetCodePoints  | 
1715  |  |  * @see USetRanges  | 
1716  |  |  * @see USetElements  | 
1717  |  |  */  | 
1718  |  | class USetStrings { | 
1719  |  | public:  | 
1720  |  |     /**  | 
1721  |  |      * Constructs a C++ "range" object over the strings of the USet.  | 
1722  |  |      * @draft ICU 76  | 
1723  |  |      */  | 
1724  | 0  |     USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {} | 
1725  |  |  | 
1726  |  |     /** @draft ICU 76 */  | 
1727  |  |     USetStrings(const USetStrings &other) = default;  | 
1728  |  |  | 
1729  |  |     /** @draft ICU 76 */  | 
1730  | 0  |     USetStringIterator begin() const { | 
1731  | 0  |         return USetStringIterator(uset, 0, count);  | 
1732  | 0  |     }  | 
1733  |  |  | 
1734  |  |     /** @draft ICU 76 */  | 
1735  | 0  |     USetStringIterator end() const { | 
1736  | 0  |         return USetStringIterator(uset, count, count);  | 
1737  | 0  |     }  | 
1738  |  |  | 
1739  |  | private:  | 
1740  |  |     const USet *uset;  | 
1741  |  |     int32_t count;  | 
1742  |  | };  | 
1743  |  | #endif  // U_HIDE_DRAFT_API  | 
1744  |  |  | 
1745  |  | #ifndef U_HIDE_DRAFT_API  | 
1746  |  | /**  | 
1747  |  |  * Iterator returned by USetElements.  | 
1748  |  |  * @draft ICU 77  | 
1749  |  |  */  | 
1750  |  | class USetElementIterator { | 
1751  |  | public:  | 
1752  |  |     /** @draft ICU 77 */  | 
1753  |  |     USetElementIterator(const USetElementIterator &other) = default;  | 
1754  |  |  | 
1755  |  |     /** @draft ICU 77 */  | 
1756  | 0  |     bool operator==(const USetElementIterator &other) const { | 
1757  | 0  |         // No need to compare rangeCount & end given private constructor  | 
1758  | 0  |         // and assuming we don't compare iterators across the set being modified.  | 
1759  | 0  |         // We might even skip comparing uset.  | 
1760  | 0  |         // Unless we want operator==() to be "correct" for more than iteration.  | 
1761  | 0  |         return uset == other.uset && c == other.c && index == other.index;  | 
1762  | 0  |     }  | 
1763  |  |  | 
1764  |  |     /** @draft ICU 77 */  | 
1765  | 0  |     bool operator!=(const USetElementIterator &other) const { return !operator==(other); } | 
1766  |  |  | 
1767  |  |     /** @draft ICU 77 */  | 
1768  | 0  |     std::u16string operator*() const { | 
1769  | 0  |         if (c >= 0) { | 
1770  | 0  |             return c <= 0xffff ?  | 
1771  | 0  |                 std::u16string({static_cast<char16_t>(c)}) : | 
1772  | 0  |                 std::u16string({U16_LEAD(c), U16_TRAIL(c)}); | 
1773  | 0  |         } else if (index < totalCount) { | 
1774  | 0  |             int32_t length;  | 
1775  | 0  |             const UChar *uchars = uset_getString(uset, index - rangeCount, &length);  | 
1776  | 0  |             // assert uchars != nullptr;  | 
1777  | 0  |             return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)}; | 
1778  | 0  |         } else { | 
1779  | 0  |             return {}; | 
1780  | 0  |         }  | 
1781  | 0  |     }  | 
1782  |  |  | 
1783  |  |     /**  | 
1784  |  |      * Pre-increment.  | 
1785  |  |      * @draft ICU 77  | 
1786  |  |      */  | 
1787  | 0  |     USetElementIterator &operator++() { | 
1788  | 0  |         if (c < end) { | 
1789  | 0  |             ++c;  | 
1790  | 0  |         } else if (index < rangeCount) { | 
1791  | 0  |             UErrorCode errorCode = U_ZERO_ERROR;  | 
1792  | 0  |             int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);  | 
1793  | 0  |             if (U_SUCCESS(errorCode) && result == 0) { | 
1794  | 0  |                 ++index;  | 
1795  | 0  |             } else { | 
1796  | 0  |                 c = end = U_SENTINEL;  | 
1797  | 0  |             }  | 
1798  | 0  |         } else if (c >= 0) { | 
1799  | 0  |             // assert index == rangeCount;  | 
1800  | 0  |             // Switch from the last range to the first string.  | 
1801  | 0  |             c = end = U_SENTINEL;  | 
1802  | 0  |         } else { | 
1803  | 0  |             ++index;  | 
1804  | 0  |         }  | 
1805  | 0  |         return *this;  | 
1806  | 0  |     }  | 
1807  |  |  | 
1808  |  |     /**  | 
1809  |  |      * Post-increment.  | 
1810  |  |      * @draft ICU 77  | 
1811  |  |      */  | 
1812  | 0  |     USetElementIterator operator++(int) { | 
1813  | 0  |         USetElementIterator result(*this);  | 
1814  | 0  |         operator++();  | 
1815  | 0  |         return result;  | 
1816  | 0  |     }  | 
1817  |  |  | 
1818  |  | private:  | 
1819  |  |     friend class USetElements;  | 
1820  |  |  | 
1821  |  |     USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)  | 
1822  |  |             : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),  | 
1823  | 0  |                 c(U_SENTINEL), end(U_SENTINEL) { | 
1824  | 0  |         if (index < rangeCount) { | 
1825  | 0  |             // Fetch the first range.  | 
1826  | 0  |             operator++();  | 
1827  | 0  |         }  | 
1828  | 0  |         // Otherwise don't move beyond the (index - rangeCount)-th string.  | 
1829  | 0  |     }  | 
1830  |  |  | 
1831  |  |     const USet *uset;  | 
1832  |  |     int32_t index;  | 
1833  |  |     /** Number of UnicodeSet/USet code point ranges. */  | 
1834  |  |     int32_t rangeCount;  | 
1835  |  |     /**  | 
1836  |  |      * Number of code point ranges plus number of strings.  | 
1837  |  |      * index starts from 0, counts ranges while less than rangeCount,  | 
1838  |  |      * then counts strings while at least rangeCount and less than totalCount.  | 
1839  |  |      *  | 
1840  |  |      * Note that totalCount is the same as uset_getItemCount(), but usually  | 
1841  |  |      * smaller than the number of elements returned by this iterator  | 
1842  |  |      * because we return each code point of each range.  | 
1843  |  |      */  | 
1844  |  |     int32_t totalCount;  | 
1845  |  |     UChar32 c, end;  | 
1846  |  | };  | 
1847  |  |  | 
1848  |  | /**  | 
1849  |  |  * A C++ "range" for iterating over all of the elements of a USet.  | 
1850  |  |  * Convenient all-in one iteration, but creates a std::u16string for each  | 
1851  |  |  * code point or string.  | 
1852  |  |  *  | 
1853  |  |  * Code points are returned first, then empty and multi-character strings.  | 
1854  |  |  *  | 
1855  |  |  * \code  | 
1856  |  |  * using U_HEADER_NESTED_NAMESPACE::USetElements;  | 
1857  |  |  * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); | 
1858  |  |  * for (auto el : USetElements(uset.getAlias())) { | 
1859  |  |  *     int32_t len32 = el.length();  | 
1860  |  |  *     char utf8[200];  | 
1861  |  |  *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr, | 
1862  |  |  *                        el.data(), len32, 0xFFFD, nullptr, errorCode);  | 
1863  |  |  *     printf("uset.element length %ld \"%s\"\n", long{len32}, utf8); | 
1864  |  |  * }  | 
1865  |  |  * \endcode  | 
1866  |  |  *  | 
1867  |  |  * C++ UnicodeSet has member functions for iteration, including begin() and end().  | 
1868  |  |  *  | 
1869  |  |  * @return an all-elements iterator.  | 
1870  |  |  * @draft ICU 77  | 
1871  |  |  * @see USetCodePoints  | 
1872  |  |  * @see USetRanges  | 
1873  |  |  * @see USetStrings  | 
1874  |  |  */  | 
1875  |  | class USetElements { | 
1876  |  | public:  | 
1877  |  |     /**  | 
1878  |  |      * Constructs a C++ "range" object over all of the elements of the USet.  | 
1879  |  |      * @draft ICU 77  | 
1880  |  |      */  | 
1881  |  |     USetElements(const USet *pUset)  | 
1882  |  |         : uset(pUset), rangeCount(uset_getRangeCount(pUset)),  | 
1883  | 0  |             stringCount(uset_getStringCount(pUset)) {} | 
1884  |  |  | 
1885  |  |     /** @draft ICU 77 */  | 
1886  |  |     USetElements(const USetElements &other) = default;  | 
1887  |  |  | 
1888  |  |     /** @draft ICU 77 */  | 
1889  | 0  |     USetElementIterator begin() const { | 
1890  | 0  |         return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);  | 
1891  | 0  |     }  | 
1892  |  |  | 
1893  |  |     /** @draft ICU 77 */  | 
1894  | 0  |     USetElementIterator end() const { | 
1895  | 0  |         return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);  | 
1896  | 0  |     }  | 
1897  |  |  | 
1898  |  | private:  | 
1899  |  |     const USet *uset;  | 
1900  |  |     int32_t rangeCount, stringCount;  | 
1901  |  | };  | 
1902  |  |  | 
1903  |  | }  // namespace U_HEADER_ONLY_NAMESPACE  | 
1904  |  |  | 
1905  |  | #endif  // U_HIDE_DRAFT_API  | 
1906  |  | #endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API  | 
1907  |  |  | 
1908  |  | #endif  // __USET_H__  |