Coverage Report

Created: 2025-10-31 09:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/node/deps/icu-small/source/common/unicode/uset.h
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2002-2014, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uset.h
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002mar07
16
*   created by: Markus W. Scherer
17
*
18
*   C version of UnicodeSet.
19
*/
20
21
22
/**
23
 * \file
24
 * \brief C API: Unicode Set
25
 *
26
 * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
27
 */
28
29
#ifndef __USET_H__
30
#define __USET_H__
31
32
#include "unicode/utypes.h"
33
#include "unicode/uchar.h"
34
35
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
36
#include <string>
37
#include <string_view>
38
#include "unicode/char16ptr.h"
39
#include "unicode/localpointer.h"
40
#include "unicode/utf16.h"
41
#endif
42
43
#ifndef USET_DEFINED
44
45
#ifndef U_IN_DOXYGEN
46
#define USET_DEFINED
47
#endif
48
/**
49
 * USet is the C API type corresponding to C++ class UnicodeSet.
50
 * Use the uset_* API to manipulate.  Create with
51
 * uset_open*, and destroy with uset_close.
52
 * @stable ICU 2.4
53
 */
54
typedef struct USet USet;
55
#endif
56
57
/**
58
 * Bitmask values to be passed to uset_openPatternOptions() or
59
 * uset_applyPattern() taking an option parameter.
60
 *
61
 * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
62
 * These case options are mutually exclusive.
63
 *
64
 * Undefined options bits are ignored, and reserved for future use.
65
 *
66
 * @stable ICU 2.4
67
 */
68
enum {
69
    /**
70
     * Ignore white space within patterns unless quoted or escaped.
71
     * @stable ICU 2.4
72
     */
73
    USET_IGNORE_SPACE = 1,
74
75
    /**
76
     * Enable case insensitive matching.  E.g., "[ab]" with this flag
77
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
78
     * match all except 'a', 'A', 'b', and 'B'. This performs a full
79
     * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
80
     *
81
     * The resulting set is a superset of the input for the code points but
82
     * not for the strings.
83
     * It performs a case mapping closure of the code points and adds
84
     * full case folding strings for the code points, and reduces strings of
85
     * the original set to their full case folding equivalents.
86
     *
87
     * This is designed for case-insensitive matches, for example
88
     * in regular expressions. The full code point case closure allows checking of
89
     * an input character directly against the closure set.
90
     * Strings are matched by comparing the case-folded form from the closure
91
     * set with an incremental case folding of the string in question.
92
     *
93
     * The closure set will also contain single code points if the original
94
     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
95
     * This is not necessary (that is, redundant) for the above matching method
96
     * but results in the same closure sets regardless of whether the original
97
     * set contained the code point or a string.
98
     *
99
     * @stable ICU 2.4
100
     */
101
    USET_CASE_INSENSITIVE = 2,
102
103
    /**
104
     * Adds all case mappings for each element in the set.
105
     * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
106
     * of each existing element in the set.
107
     *
108
     * Unlike the “case insensitive” options, this does not perform a closure.
109
     * For example, it does not add 'ſ' (U+017F long s) for 's',
110
     * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
111
     *
112
     * @stable ICU 3.2
113
     */
114
    USET_ADD_CASE_MAPPINGS = 4,
115
116
    /**
117
     * Enable case insensitive matching.
118
     * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
119
     * which map each code point to one code point,
120
     * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
121
     *
122
     * This is designed for case-insensitive matches, for example in certain
123
     * regular expression implementations where only Simple_Case_Folding mappings are used,
124
     * such as in ECMAScript (JavaScript) regular expressions.
125
     *
126
     * @stable ICU 73
127
     */
128
    USET_SIMPLE_CASE_INSENSITIVE = 6
129
};
130
131
/**
132
 * Argument values for whether span() and similar functions continue while
133
 * the current character is contained vs. not contained in the set.
134
 *
135
 * The functionality is straightforward for sets with only single code points,
136
 * without strings (which is the common case):
137
 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
138
 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
139
 * - span() and spanBack() partition any string the same way when
140
 *   alternating between span(USET_SPAN_NOT_CONTAINED) and
141
 *   span(either "contained" condition).
142
 * - Using a complemented (inverted) set and the opposite span conditions
143
 *   yields the same results.
144
 *
145
 * When a set contains multi-code point strings, then these statements may not
146
 * be true, depending on the strings in the set (for example, whether they
147
 * overlap with each other) and the string that is processed.
148
 * For a set with strings:
149
 * - The complement of the set contains the opposite set of code points,
150
 *   but the same set of strings.
151
 *   Therefore, complementing both the set and the span conditions
152
 *   may yield different results.
153
 * - When starting spans at different positions in a string
154
 *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
155
 *   because a set string may start before the later position.
156
 * - span(USET_SPAN_SIMPLE) may be shorter than
157
 *   span(USET_SPAN_CONTAINED) because it will not recursively try
158
 *   all possible paths.
159
 *   For example, with a set which contains the three strings "xy", "xya" and "ax",
160
 *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
161
 *   span("xyax", USET_SPAN_SIMPLE) will return 3.
162
 *   span(USET_SPAN_SIMPLE) will never be longer than
163
 *   span(USET_SPAN_CONTAINED).
164
 * - With either "contained" condition, span() and spanBack() may partition
165
 *   a string in different ways.
166
 *   For example, with a set which contains the two strings "ab" and "ba",
167
 *   and when processing the string "aba",
168
 *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
169
 *   while spanBack() will yield boundaries of { 0, 1, 3 }.
170
 *
171
 * Note: If it is important to get the same boundaries whether iterating forward
172
 * or backward through a string, then either only span() should be used and
173
 * the boundaries cached for backward operation, or an ICU BreakIterator
174
 * could be used.
175
 *
176
 * Note: Unpaired surrogates are treated like surrogate code points.
177
 * Similarly, set strings match only on code point boundaries,
178
 * never in the middle of a surrogate pair.
179
 * Illegal UTF-8 sequences are treated like U+FFFD.
180
 * When processing UTF-8 strings, malformed set strings
181
 * (strings with unpaired surrogates which cannot be converted to UTF-8)
182
 * are ignored.
183
 *
184
 * @stable ICU 3.8
185
 */
186
typedef enum USetSpanCondition {
187
    /**
188
     * Continues a span() while there is no set element at the current position.
189
     * Increments by one code point at a time.
190
     * Stops before the first set element (character or string).
191
     * (For code points only, this is like while contains(current)==false).
192
     *
193
     * When span() returns, the substring between where it started and the position
194
     * it returned consists only of characters that are not in the set,
195
     * and none of its strings overlap with the span.
196
     *
197
     * @stable ICU 3.8
198
     */
199
    USET_SPAN_NOT_CONTAINED = 0,
200
    /**
201
     * Spans the longest substring that is a concatenation of set elements (characters or strings).
202
     * (For characters only, this is like while contains(current)==true).
203
     *
204
     * When span() returns, the substring between where it started and the position
205
     * it returned consists only of set elements (characters or strings) that are in the set.
206
     *
207
     * If a set contains strings, then the span will be the longest substring for which there
208
     * exists at least one non-overlapping concatenation of set elements (characters or strings).
209
     * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
210
     * (Java/ICU/Perl regex stops at the first match of an OR.)
211
     *
212
     * @stable ICU 3.8
213
     */
214
    USET_SPAN_CONTAINED = 1,
215
    /**
216
     * Continues a span() while there is a set element at the current position.
217
     * Increments by the longest matching element at each position.
218
     * (For characters only, this is like while contains(current)==true).
219
     *
220
     * When span() returns, the substring between where it started and the position
221
     * it returned consists only of set elements (characters or strings) that are in the set.
222
     *
223
     * If a set only contains single characters, then this is the same
224
     * as USET_SPAN_CONTAINED.
225
     *
226
     * If a set contains strings, then the span will be the longest substring
227
     * with a match at each position with the longest single set element (character or string).
228
     *
229
     * Use this span condition together with other longest-match algorithms,
230
     * such as ICU converters (ucnv_getUnicodeSet()).
231
     *
232
     * @stable ICU 3.8
233
     */
234
    USET_SPAN_SIMPLE = 2,
235
#ifndef U_HIDE_DEPRECATED_API
236
    /**
237
     * One more than the last span condition.
238
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
239
     */
240
    USET_SPAN_CONDITION_COUNT
241
#endif  // U_HIDE_DEPRECATED_API
242
} USetSpanCondition;
243
244
enum {
245
    /**
246
     * Capacity of USerializedSet::staticArray.
247
     * Enough for any single-code point set.
248
     * Also provides padding for nice sizeof(USerializedSet).
249
     * @stable ICU 2.4
250
     */
251
    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
252
};
253
254
/**
255
 * A serialized form of a Unicode set.  Limited manipulations are
256
 * possible directly on a serialized set.  See below.
257
 * @stable ICU 2.4
258
 */
259
typedef struct USerializedSet {
260
    /**
261
     * The serialized Unicode Set.
262
     * @stable ICU 2.4
263
     */
264
    const uint16_t *array;
265
    /**
266
     * The length of the array that contains BMP characters.
267
     * @stable ICU 2.4
268
     */
269
    int32_t bmpLength;
270
    /**
271
     * The total length of the array.
272
     * @stable ICU 2.4
273
     */
274
    int32_t length;
275
    /**
276
     * A small buffer for the array to reduce memory allocations.
277
     * @stable ICU 2.4
278
     */
279
    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
280
} USerializedSet;
281
282
/*********************************************************************
283
 * USet API
284
 *********************************************************************/
285
286
/**
287
 * Create an empty USet object.
288
 * Equivalent to uset_open(1, 0).
289
 * @return a newly created USet.  The caller must call uset_close() on
290
 * it when done.
291
 * @stable ICU 4.2
292
 */
293
U_CAPI USet* U_EXPORT2
294
uset_openEmpty(void);
295
296
/**
297
 * Creates a USet object that contains the range of characters
298
 * start..end, inclusive.  If <code>start > end</code> 
299
 * then an empty set is created (same as using uset_openEmpty()).
300
 * @param start first character of the range, inclusive
301
 * @param end last character of the range, inclusive
302
 * @return a newly created USet.  The caller must call uset_close() on
303
 * it when done.
304
 * @stable ICU 2.4
305
 */
306
U_CAPI USet* U_EXPORT2
307
uset_open(UChar32 start, UChar32 end);
308
309
/**
310
 * Creates a set from the given pattern.  See the UnicodeSet class
311
 * description for the syntax of the pattern language.
312
 * @param pattern a string specifying what characters are in the set
313
 * @param patternLength the length of the pattern, or -1 if null
314
 * terminated
315
 * @param ec the error code
316
 * @stable ICU 2.4
317
 */
318
U_CAPI USet* U_EXPORT2
319
uset_openPattern(const UChar* pattern, int32_t patternLength,
320
                 UErrorCode* ec);
321
322
/**
323
 * Creates a set from the given pattern.  See the UnicodeSet class
324
 * description for the syntax of the pattern language.
325
 * @param pattern a string specifying what characters are in the set
326
 * @param patternLength the length of the pattern, or -1 if null
327
 * terminated
328
 * @param options bitmask for options to apply to the pattern.
329
 * Valid options are USET_IGNORE_SPACE and
330
 * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
331
 * These case options are mutually exclusive.
332
 * @param ec the error code
333
 * @stable ICU 2.4
334
 */
335
U_CAPI USet* U_EXPORT2
336
uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
337
                 uint32_t options,
338
                 UErrorCode* ec);
339
340
/**
341
 * Disposes of the storage used by a USet object.  This function should
342
 * be called exactly once for objects returned by uset_open().
343
 * @param set the object to dispose of
344
 * @stable ICU 2.4
345
 */
346
U_CAPI void U_EXPORT2
347
uset_close(USet* set);
348
349
#if U_SHOW_CPLUSPLUS_API
350
351
U_NAMESPACE_BEGIN
352
353
/**
354
 * \class LocalUSetPointer
355
 * "Smart pointer" class, closes a USet via uset_close().
356
 * For most methods see the LocalPointerBase base class.
357
 *
358
 * @see LocalPointerBase
359
 * @see LocalPointer
360
 * @stable ICU 4.4
361
 */
362
U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
363
364
U_NAMESPACE_END
365
366
#endif
367
368
/**
369
 * Returns a copy of this object.
370
 * If this set is frozen, then the clone will be frozen as well.
371
 * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
372
 * @param set the original set
373
 * @return the newly allocated copy of the set
374
 * @see uset_cloneAsThawed
375
 * @stable ICU 3.8
376
 */
377
U_CAPI USet * U_EXPORT2
378
uset_clone(const USet *set);
379
380
/**
381
 * Determines whether the set has been frozen (made immutable) or not.
382
 * See the ICU4J Freezable interface for details.
383
 * @param set the set
384
 * @return true/false for whether the set has been frozen
385
 * @see uset_freeze
386
 * @see uset_cloneAsThawed
387
 * @stable ICU 3.8
388
 */
389
U_CAPI UBool U_EXPORT2
390
uset_isFrozen(const USet *set);
391
392
/**
393
 * Freeze the set (make it immutable).
394
 * Once frozen, it cannot be unfrozen and is therefore thread-safe
395
 * until it is deleted.
396
 * See the ICU4J Freezable interface for details.
397
 * Freezing the set may also make some operations faster, for example
398
 * uset_contains() and uset_span().
399
 * A frozen set will not be modified. (It remains frozen.)
400
 * @param set the set
401
 * @return the same set, now frozen
402
 * @see uset_isFrozen
403
 * @see uset_cloneAsThawed
404
 * @stable ICU 3.8
405
 */
406
U_CAPI void U_EXPORT2
407
uset_freeze(USet *set);
408
409
/**
410
 * Clone the set and make the clone mutable.
411
 * See the ICU4J Freezable interface for details.
412
 * @param set the set
413
 * @return the mutable clone
414
 * @see uset_freeze
415
 * @see uset_isFrozen
416
 * @see uset_clone
417
 * @stable ICU 3.8
418
 */
419
U_CAPI USet * U_EXPORT2
420
uset_cloneAsThawed(const USet *set);
421
422
/**
423
 * Causes the USet object to represent the range <code>start - end</code>.
424
 * If <code>start > end</code> then this USet is set to an empty range.
425
 * A frozen set will not be modified.
426
 * @param set the object to set to the given range
427
 * @param start first character in the set, inclusive
428
 * @param end last character in the set, inclusive
429
 * @stable ICU 3.2
430
 */
431
U_CAPI void U_EXPORT2
432
uset_set(USet* set,
433
         UChar32 start, UChar32 end);
434
435
/**
436
 * Modifies the set to represent the set specified by the given
437
 * pattern. See the UnicodeSet class description for the syntax of 
438
 * the pattern language. See also the User Guide chapter about UnicodeSet.
439
 * <em>Empties the set passed before applying the pattern.</em>
440
 * A frozen set will not be modified.
441
 * @param set               The set to which the pattern is to be applied. 
442
 * @param pattern           A pointer to UChar string specifying what characters are in the set.
443
 *                          The character at pattern[0] must be a '['.
444
 * @param patternLength     The length of the UChar string. -1 if NUL terminated.
445
 * @param options           A bitmask for options to apply to the pattern.
446
 *                          Valid options are USET_IGNORE_SPACE and
447
 *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
448
 *                          USET_SIMPLE_CASE_INSENSITIVE.
449
 *                          These case options are mutually exclusive.
450
 * @param status            Returns an error if the pattern cannot be parsed.
451
 * @return                  Upon successful parse, the value is either
452
 *                          the index of the character after the closing ']' 
453
 *                          of the parsed pattern.
454
 *                          If the status code indicates failure, then the return value 
455
 *                          is the index of the error in the source.
456
 *
457
 * @stable ICU 2.8
458
 */
459
U_CAPI int32_t U_EXPORT2 
460
uset_applyPattern(USet *set,
461
                  const UChar *pattern, int32_t patternLength,
462
                  uint32_t options,
463
                  UErrorCode *status);
464
465
/**
466
 * Modifies the set to contain those code points which have the given value
467
 * for the given binary or enumerated property, as returned by
468
 * u_getIntPropertyValue.  Prior contents of this set are lost.
469
 * A frozen set will not be modified.
470
 *
471
 * @param set the object to contain the code points defined by the property
472
 *
473
 * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
474
 * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
475
 * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
476
 *
477
 * @param value a value in the range u_getIntPropertyMinValue(prop)..
478
 * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
479
 * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
480
 * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
481
 * categories such as [:L:] to be represented.
482
 *
483
 * @param ec error code input/output parameter
484
 *
485
 * @stable ICU 3.2
486
 */
487
U_CAPI void U_EXPORT2
488
uset_applyIntPropertyValue(USet* set,
489
                           UProperty prop, int32_t value, UErrorCode* ec);
490
491
/**
492
 * Modifies the set to contain those code points which have the
493
 * given value for the given property.  Prior contents of this
494
 * set are lost.
495
 * A frozen set will not be modified.
496
 *
497
 * @param set the object to contain the code points defined by the given
498
 * property and value alias
499
 *
500
 * @param prop a string specifying a property alias, either short or long.
501
 * The name is matched loosely.  See PropertyAliases.txt for names and a
502
 * description of loose matching.  If the value string is empty, then this
503
 * string is interpreted as either a General_Category value alias, a Script
504
 * value alias, a binary property alias, or a special ID.  Special IDs are
505
 * matched loosely and correspond to the following sets:
506
 *
507
 * "ANY" = [\\u0000-\\U0010FFFF],
508
 * "ASCII" = [\\u0000-\\u007F],
509
 * "Assigned" = [:^Cn:].
510
 *
511
 * @param propLength the length of the prop, or -1 if NULL
512
 *
513
 * @param value a string specifying a value alias, either short or long.
514
 * The name is matched loosely.  See PropertyValueAliases.txt for names
515
 * and a description of loose matching.  In addition to aliases listed,
516
 * numeric values and canonical combining classes may be expressed
517
 * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
518
 * may also be empty.
519
 *
520
 * @param valueLength the length of the value, or -1 if NULL
521
 *
522
 * @param ec error code input/output parameter
523
 *
524
 * @stable ICU 3.2
525
 */
526
U_CAPI void U_EXPORT2
527
uset_applyPropertyAlias(USet* set,
528
                        const UChar *prop, int32_t propLength,
529
                        const UChar *value, int32_t valueLength,
530
                        UErrorCode* ec);
531
532
/**
533
 * Return true if the given position, in the given pattern, appears
534
 * to be the start of a UnicodeSet pattern.
535
 *
536
 * @param pattern a string specifying the pattern
537
 * @param patternLength the length of the pattern, or -1 if NULL
538
 * @param pos the given position
539
 * @stable ICU 3.2
540
 */
541
U_CAPI UBool U_EXPORT2
542
uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
543
                      int32_t pos);
544
545
/**
546
 * Returns a string representation of this set.  If the result of
547
 * calling this function is passed to a uset_openPattern(), it
548
 * will produce another set that is equal to this one.
549
 * @param set the set
550
 * @param result the string to receive the rules, may be NULL
551
 * @param resultCapacity the capacity of result, may be 0 if result is NULL
552
 * @param escapeUnprintable if true then convert unprintable
553
 * character to their hex escape representations, \\uxxxx or
554
 * \\Uxxxxxxxx.  Unprintable characters are those other than
555
 * U+000A, U+0020..U+007E.
556
 * @param ec error code.
557
 * @return length of string, possibly larger than resultCapacity
558
 * @stable ICU 2.4
559
 */
560
U_CAPI int32_t U_EXPORT2
561
uset_toPattern(const USet* set,
562
               UChar* result, int32_t resultCapacity,
563
               UBool escapeUnprintable,
564
               UErrorCode* ec);
565
566
/**
567
 * Adds the given character to the given USet.  After this call,
568
 * uset_contains(set, c) will return true.
569
 * A frozen set will not be modified.
570
 * @param set the object to which to add the character
571
 * @param c the character to add
572
 * @stable ICU 2.4
573
 */
574
U_CAPI void U_EXPORT2
575
uset_add(USet* set, UChar32 c);
576
577
/**
578
 * Adds all of the elements in the specified set to this set if
579
 * they're not already present.  This operation effectively
580
 * modifies this set so that its value is the <i>union</i> of the two
581
 * sets.  The behavior of this operation is unspecified if the specified
582
 * collection is modified while the operation is in progress.
583
 * A frozen set will not be modified.
584
 *
585
 * @param set the object to which to add the set
586
 * @param additionalSet the source set whose elements are to be added to this set.
587
 * @stable ICU 2.6
588
 */
589
U_CAPI void U_EXPORT2
590
uset_addAll(USet* set, const USet *additionalSet);
591
592
/**
593
 * Adds the given range of characters to the given USet.  After this call,
594
 * uset_contains(set, start, end) will return true.
595
 * A frozen set will not be modified.
596
 * @param set the object to which to add the character
597
 * @param start the first character of the range to add, inclusive
598
 * @param end the last character of the range to add, inclusive
599
 * @stable ICU 2.2
600
 */
601
U_CAPI void U_EXPORT2
602
uset_addRange(USet* set, UChar32 start, UChar32 end);
603
604
/**
605
 * Adds the given string to the given USet.  After this call,
606
 * uset_containsString(set, str, strLen) will return true.
607
 * A frozen set will not be modified.
608
 * @param set the object to which to add the character
609
 * @param str the string to add
610
 * @param strLen the length of the string or -1 if null terminated.
611
 * @stable ICU 2.4
612
 */
613
U_CAPI void U_EXPORT2
614
uset_addString(USet* set, const UChar* str, int32_t strLen);
615
616
/**
617
 * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
618
 * If this set already contains any particular character, it has no effect on that character.
619
 * A frozen set will not be modified.
620
 * @param set the object to which to add the character
621
 * @param str the source string
622
 * @param strLen the length of the string or -1 if null terminated.
623
 * @stable ICU 3.4
624
 */
625
U_CAPI void U_EXPORT2
626
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
627
628
/**
629
 * Removes the given character from the given USet.  After this call,
630
 * uset_contains(set, c) will return false.
631
 * A frozen set will not be modified.
632
 * @param set the object from which to remove the character
633
 * @param c the character to remove
634
 * @stable ICU 2.4
635
 */
636
U_CAPI void U_EXPORT2
637
uset_remove(USet* set, UChar32 c);
638
639
/**
640
 * Removes the given range of characters from the given USet.  After this call,
641
 * uset_contains(set, start, end) will return false.
642
 * A frozen set will not be modified.
643
 * @param set the object to which to add the character
644
 * @param start the first character of the range to remove, inclusive
645
 * @param end the last character of the range to remove, inclusive
646
 * @stable ICU 2.2
647
 */
648
U_CAPI void U_EXPORT2
649
uset_removeRange(USet* set, UChar32 start, UChar32 end);
650
651
/**
652
 * Removes the given string to the given USet.  After this call,
653
 * uset_containsString(set, str, strLen) will return false.
654
 * A frozen set will not be modified.
655
 * @param set the object to which to add the character
656
 * @param str the string to remove
657
 * @param strLen the length of the string or -1 if null terminated.
658
 * @stable ICU 2.4
659
 */
660
U_CAPI void U_EXPORT2
661
uset_removeString(USet* set, const UChar* str, int32_t strLen);
662
663
/**
664
 * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
665
 * A frozen set will not be modified.
666
 *
667
 * @param set the object to be modified
668
 * @param str the string
669
 * @param length the length of the string, or -1 if NUL-terminated
670
 * @stable ICU 69
671
 */
672
U_CAPI void U_EXPORT2
673
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
674
675
/**
676
 * Removes from this set all of its elements that are contained in the
677
 * specified set.  This operation effectively modifies this
678
 * set so that its value is the <i>asymmetric set difference</i> of
679
 * the two sets.
680
 * A frozen set will not be modified.
681
 * @param set the object from which the elements are to be removed
682
 * @param removeSet the object that defines which elements will be
683
 * removed from this set
684
 * @stable ICU 3.2
685
 */
686
U_CAPI void U_EXPORT2
687
uset_removeAll(USet* set, const USet* removeSet);
688
689
/**
690
 * Retain only the elements in this set that are contained in the
691
 * specified range.  If <code>start > end</code> then an empty range is
692
 * retained, leaving the set empty.  This is equivalent to
693
 * a boolean logic AND, or a set INTERSECTION.
694
 * A frozen set will not be modified.
695
 *
696
 * @param set the object for which to retain only the specified range
697
 * @param start first character, inclusive, of range
698
 * @param end last character, inclusive, of range
699
 * @stable ICU 3.2
700
 */
701
U_CAPI void U_EXPORT2
702
uset_retain(USet* set, UChar32 start, UChar32 end);
703
704
/**
705
 * Retains only the specified string from this set if it is present.
706
 * Upon return this set will be empty if it did not contain s, or
707
 * will only contain s if it did contain s.
708
 * A frozen set will not be modified.
709
 *
710
 * @param set the object to be modified
711
 * @param str the string
712
 * @param length the length of the string, or -1 if NUL-terminated
713
 * @stable ICU 69
714
 */
715
U_CAPI void U_EXPORT2
716
uset_retainString(USet *set, const UChar *str, int32_t length);
717
718
/**
719
 * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
720
 * A frozen set will not be modified.
721
 *
722
 * @param set the object to be modified
723
 * @param str the string
724
 * @param length the length of the string, or -1 if NUL-terminated
725
 * @stable ICU 69
726
 */
727
U_CAPI void U_EXPORT2
728
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
729
730
/**
731
 * Retains only the elements in this set that are contained in the
732
 * specified set.  In other words, removes from this set all of
733
 * its elements that are not contained in the specified set.  This
734
 * operation effectively modifies this set so that its value is
735
 * the <i>intersection</i> of the two sets.
736
 * A frozen set will not be modified.
737
 *
738
 * @param set the object on which to perform the retain
739
 * @param retain set that defines which elements this set will retain
740
 * @stable ICU 3.2
741
 */
742
U_CAPI void U_EXPORT2
743
uset_retainAll(USet* set, const USet* retain);
744
745
/**
746
 * Reallocate this objects internal structures to take up the least
747
 * possible space, without changing this object's value.
748
 * A frozen set will not be modified.
749
 *
750
 * @param set the object on which to perform the compact
751
 * @stable ICU 3.2
752
 */
753
U_CAPI void U_EXPORT2
754
uset_compact(USet* set);
755
756
/**
757
 * This is equivalent to
758
 * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
759
 *
760
 * <strong>Note:</strong> This performs a symmetric difference with all code points
761
 * <em>and thus retains all multicharacter strings</em>.
762
 * In order to achieve a “code point complement” (all code points minus this set),
763
 * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
764
 *
765
 * A frozen set will not be modified.
766
 * @param set the set
767
 * @stable ICU 2.4
768
 */
769
U_CAPI void U_EXPORT2
770
uset_complement(USet* set);
771
772
/**
773
 * Complements the specified range in this set.  Any character in
774
 * the range will be removed if it is in this set, or will be
775
 * added if it is not in this set.  If <code>start > end</code>
776
 * then an empty range is complemented, leaving the set unchanged.
777
 * This is equivalent to a boolean logic XOR.
778
 * A frozen set will not be modified.
779
 *
780
 * @param set the object to be modified
781
 * @param start first character, inclusive, of range
782
 * @param end last character, inclusive, of range
783
 * @stable ICU 69
784
 */
785
U_CAPI void U_EXPORT2
786
uset_complementRange(USet *set, UChar32 start, UChar32 end);
787
788
/**
789
 * Complements the specified string in this set.
790
 * The string will be removed if it is in this set, or will be added if it is not in this set.
791
 * A frozen set will not be modified.
792
 *
793
 * @param set the object to be modified
794
 * @param str the string
795
 * @param length the length of the string, or -1 if NUL-terminated
796
 * @stable ICU 69
797
 */
798
U_CAPI void U_EXPORT2
799
uset_complementString(USet *set, const UChar *str, int32_t length);
800
801
/**
802
 * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
803
 * A frozen set will not be modified.
804
 *
805
 * @param set the object to be modified
806
 * @param str the string
807
 * @param length the length of the string, or -1 if NUL-terminated
808
 * @stable ICU 69
809
 */
810
U_CAPI void U_EXPORT2
811
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
812
813
/**
814
 * Complements in this set all elements contained in the specified
815
 * set.  Any character in the other set will be removed if it is
816
 * in this set, or will be added if it is not in this set.
817
 * A frozen set will not be modified.
818
 *
819
 * @param set the set with which to complement
820
 * @param complement set that defines which elements will be xor'ed
821
 * from this set.
822
 * @stable ICU 3.2
823
 */
824
U_CAPI void U_EXPORT2
825
uset_complementAll(USet* set, const USet* complement);
826
827
/**
828
 * Removes all of the elements from this set.  This set will be
829
 * empty after this call returns.
830
 * A frozen set will not be modified.
831
 * @param set the set
832
 * @stable ICU 2.4
833
 */
834
U_CAPI void U_EXPORT2
835
uset_clear(USet* set);
836
837
/**
838
 * Close this set over the given attribute.  For the attribute
839
 * USET_CASE_INSENSITIVE, the result is to modify this set so that:
840
 *
841
 * 1. For each character or string 'a' in this set, all strings or
842
 * characters 'b' such that foldCase(a) == foldCase(b) are added
843
 * to this set.
844
 *
845
 * 2. For each string 'e' in the resulting set, if e !=
846
 * foldCase(e), 'e' will be removed.
847
 *
848
 * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
849
 *
850
 * (Here foldCase(x) refers to the operation u_strFoldCase, and a
851
 * == b denotes that the contents are the same, not pointer
852
 * comparison.)
853
 *
854
 * A frozen set will not be modified.
855
 *
856
 * @param set the set
857
 *
858
 * @param attributes bitmask for attributes to close over.
859
 * Valid options:
860
 * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
861
 * These case options are mutually exclusive.
862
 * Unrelated options bits are ignored.
863
 * @stable ICU 4.2
864
 */
865
U_CAPI void U_EXPORT2
866
uset_closeOver(USet* set, int32_t attributes);
867
868
/**
869
 * Remove all strings from this set.
870
 *
871
 * @param set the set
872
 * @stable ICU 4.2
873
 */
874
U_CAPI void U_EXPORT2
875
uset_removeAllStrings(USet* set);
876
877
/**
878
 * Returns true if the given USet contains no characters and no
879
 * strings.
880
 * @param set the set
881
 * @return true if set is empty
882
 * @stable ICU 2.4
883
 */
884
U_CAPI UBool U_EXPORT2
885
uset_isEmpty(const USet* set);
886
887
/**
888
 * @param set the set
889
 * @return true if this set contains multi-character strings or the empty string.
890
 * @stable ICU 70
891
 */
892
U_CAPI UBool U_EXPORT2
893
uset_hasStrings(const USet *set);
894
895
/**
896
 * Returns true if the given USet contains the given character.
897
 * This function works faster with a frozen set.
898
 * @param set the set
899
 * @param c The codepoint to check for within the set
900
 * @return true if set contains c
901
 * @stable ICU 2.4
902
 */
903
U_CAPI UBool U_EXPORT2
904
uset_contains(const USet* set, UChar32 c);
905
906
/**
907
 * Returns true if the given USet contains all characters c
908
 * where start <= c && c <= end.
909
 * @param set the set
910
 * @param start the first character of the range to test, inclusive
911
 * @param end the last character of the range to test, inclusive
912
 * @return true if set contains the range
913
 * @stable ICU 2.2
914
 */
915
U_CAPI UBool U_EXPORT2
916
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
917
918
/**
919
 * Returns true if the given USet contains the given string.
920
 * @param set the set
921
 * @param str the string
922
 * @param strLen the length of the string or -1 if null terminated.
923
 * @return true if set contains str
924
 * @stable ICU 2.4
925
 */
926
U_CAPI UBool U_EXPORT2
927
uset_containsString(const USet* set, const UChar* str, int32_t strLen);
928
929
/**
930
 * Returns the index of the given character within this set, where
931
 * the set is ordered by ascending code point.  If the character
932
 * is not in this set, return -1.  The inverse of this method is
933
 * <code>charAt()</code>.
934
 * @param set the set
935
 * @param c the character to obtain the index for
936
 * @return an index from 0..size()-1, or -1
937
 * @stable ICU 3.2
938
 */
939
U_CAPI int32_t U_EXPORT2
940
uset_indexOf(const USet* set, UChar32 c);
941
942
/**
943
 * Returns the character at the given index within this set, where
944
 * the set is ordered by ascending code point.  If the index is
945
 * out of range for characters, returns (UChar32)-1.
946
 * The inverse of this method is <code>indexOf()</code>.
947
 *
948
 * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
949
 * with uset_getItem(), because for each call it skips linearly over <code>index</code>
950
 * characters in the ranges.
951
 *
952
 * @param set the set
953
 * @param charIndex an index from 0..size()-1 to obtain the char for
954
 * @return the character at the given index, or (UChar32)-1.
955
 * @stable ICU 3.2
956
 */
957
U_CAPI UChar32 U_EXPORT2
958
uset_charAt(const USet* set, int32_t charIndex);
959
960
/**
961
 * Returns the number of characters and strings contained in this set.
962
 * The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings.
963
 *
964
 * This is slower than uset_getRangeCount() and uset_getItemCount() because
965
 * it counts the code points of all ranges.
966
 *
967
 * @param set the set
968
 * @return a non-negative integer counting the characters and strings
969
 * contained in set
970
 * @stable ICU 2.4
971
 * @see uset_getRangeCount
972
 * @see uset_getStringCount
973
 * @see uset_getItemCount
974
 */
975
U_CAPI int32_t U_EXPORT2
976
uset_size(const USet* set);
977
978
/**
979
 * @param set the set
980
 * @return the number of ranges in this set.
981
 * @stable ICU 70
982
 * @see uset_getItemCount
983
 * @see uset_getItem
984
 * @see uset_getStringCount
985
 * @see uset_size
986
 */
987
U_CAPI int32_t U_EXPORT2
988
uset_getRangeCount(const USet *set);
989
990
#ifndef U_HIDE_DRAFT_API
991
992
/**
993
 * @param set the set
994
 * @return the number of strings in this set.
995
 * @draft ICU 76
996
 * @see uset_getRangeCount
997
 * @see uset_getItemCount
998
 * @see uset_size
999
 */
1000
U_CAPI int32_t U_EXPORT2
1001
uset_getStringCount(const USet *set);
1002
1003
/**
1004
 * Returns the index-th string (empty or multi-character) in the set.
1005
 * The string may not be NUL-terminated.
1006
 * The output length must be used, and the caller must not read more than that many UChars.
1007
 *
1008
 * @param set the set
1009
 * @param index the string index, 0 .. uset_getStringCount() - 1
1010
 * @param pLength the output string length; must not be NULL
1011
 * @return the pointer to the string; NULL if the index is out of range or pLength is NULL
1012
 * @draft ICU 76
1013
 * @see uset_getStringCount
1014
 */
1015
U_CAPI const UChar* U_EXPORT2
1016
uset_getString(const USet *set, int32_t index, int32_t *pLength);
1017
1018
#endif  // U_HIDE_DRAFT_API
1019
1020
/**
1021
 * Returns the number of items in this set.  An item is either a range
1022
 * of characters or a single multicharacter string.
1023
 * @param set the set
1024
 * @return a non-negative integer counting the character ranges
1025
 * and/or strings contained in set
1026
 * @stable ICU 2.4
1027
 * @see uset_getRangeCount
1028
 * @see uset_getStringCount
1029
 */
1030
U_CAPI int32_t U_EXPORT2
1031
uset_getItemCount(const USet* set);
1032
1033
/**
1034
 * Returns an item of this set.  An item is either a range of
1035
 * characters or a single multicharacter string (which can be the empty string).
1036
 *
1037
 * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
1038
 * and the range is <code>*start</code>..<code>*end</code>.
1039
 *
1040
 * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
1041
 * this function copies the string into <code>str[strCapacity]</code> and
1042
 * returns the length of the string (0 for the empty string).
1043
 * See uset_getString() for a function that does not copy the string contents.
1044
 *
1045
 * If <code>itemIndex</code> is out of range, then this function returns -1.
1046
 *
1047
 * Note that 0 is returned for each range as well as for the empty string.
1048
 *
1049
 * @param set the set
1050
 * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
1051
 * @param start pointer to variable to receive first character in range, inclusive;
1052
 *              can be NULL for a string item
1053
 * @param end pointer to variable to receive last character in range, inclusive;
1054
 *            can be NULL for a string item
1055
 * @param str buffer to receive the string, may be NULL
1056
 * @param strCapacity capacity of str, or 0 if str is NULL
1057
 * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
1058
 * @return the length of the string (0 or >= 2), or 0 if the item is a range,
1059
 *         or -1 if the itemIndex is out of range
1060
 * @stable ICU 2.4
1061
 * @see uset_getString
1062
 */
1063
U_CAPI int32_t U_EXPORT2
1064
uset_getItem(const USet* set, int32_t itemIndex,
1065
             UChar32* start, UChar32* end,
1066
             UChar* str, int32_t strCapacity,
1067
             UErrorCode* ec);
1068
1069
/**
1070
 * Returns true if set1 contains all the characters and strings
1071
 * of set2. It answers the question, 'Is set1 a superset of set2?'
1072
 * @param set1 set to be checked for containment
1073
 * @param set2 set to be checked for containment
1074
 * @return true if the test condition is met
1075
 * @stable ICU 3.2
1076
 */
1077
U_CAPI UBool U_EXPORT2
1078
uset_containsAll(const USet* set1, const USet* set2);
1079
1080
/**
1081
 * Returns true if this set contains all the characters
1082
 * of the given string. This is does not check containment of grapheme
1083
 * clusters, like uset_containsString.
1084
 * @param set set of characters to be checked for containment
1085
 * @param str string containing codepoints to be checked for containment
1086
 * @param strLen the length of the string or -1 if null terminated.
1087
 * @return true if the test condition is met
1088
 * @stable ICU 3.4
1089
 */
1090
U_CAPI UBool U_EXPORT2
1091
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1092
1093
/**
1094
 * Returns true if set1 contains none of the characters and strings
1095
 * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
1096
 * @param set1 set to be checked for containment
1097
 * @param set2 set to be checked for containment
1098
 * @return true if the test condition is met
1099
 * @stable ICU 3.2
1100
 */
1101
U_CAPI UBool U_EXPORT2
1102
uset_containsNone(const USet* set1, const USet* set2);
1103
1104
/**
1105
 * Returns true if set1 contains some of the characters and strings
1106
 * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
1107
 * @param set1 set to be checked for containment
1108
 * @param set2 set to be checked for containment
1109
 * @return true if the test condition is met
1110
 * @stable ICU 3.2
1111
 */
1112
U_CAPI UBool U_EXPORT2
1113
uset_containsSome(const USet* set1, const USet* set2);
1114
1115
/**
1116
 * Returns the length of the initial substring of the input string which
1117
 * consists only of characters and strings that are contained in this set
1118
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1119
 * or only of characters and strings that are not contained
1120
 * in this set (USET_SPAN_NOT_CONTAINED).
1121
 * See USetSpanCondition for details.
1122
 * Similar to the strspn() C library function.
1123
 * Unpaired surrogates are treated according to contains() of their surrogate code points.
1124
 * This function works faster with a frozen set and with a non-negative string length argument.
1125
 * @param set the set
1126
 * @param s start of the string
1127
 * @param length of the string; can be -1 for NUL-terminated
1128
 * @param spanCondition specifies the containment condition
1129
 * @return the length of the initial substring according to the spanCondition;
1130
 *         0 if the start of the string does not fit the spanCondition
1131
 * @stable ICU 3.8
1132
 * @see USetSpanCondition
1133
 */
1134
U_CAPI int32_t U_EXPORT2
1135
uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1136
1137
/**
1138
 * Returns the start of the trailing substring of the input string which
1139
 * consists only of characters and strings that are contained in this set
1140
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1141
 * or only of characters and strings that are not contained
1142
 * in this set (USET_SPAN_NOT_CONTAINED).
1143
 * See USetSpanCondition for details.
1144
 * Unpaired surrogates are treated according to contains() of their surrogate code points.
1145
 * This function works faster with a frozen set and with a non-negative string length argument.
1146
 * @param set the set
1147
 * @param s start of the string
1148
 * @param length of the string; can be -1 for NUL-terminated
1149
 * @param spanCondition specifies the containment condition
1150
 * @return the start of the trailing substring according to the spanCondition;
1151
 *         the string length if the end of the string does not fit the spanCondition
1152
 * @stable ICU 3.8
1153
 * @see USetSpanCondition
1154
 */
1155
U_CAPI int32_t U_EXPORT2
1156
uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1157
1158
/**
1159
 * Returns the length of the initial substring of the input string which
1160
 * consists only of characters and strings that are contained in this set
1161
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1162
 * or only of characters and strings that are not contained
1163
 * in this set (USET_SPAN_NOT_CONTAINED).
1164
 * See USetSpanCondition for details.
1165
 * Similar to the strspn() C library function.
1166
 * Malformed byte sequences are treated according to contains(0xfffd).
1167
 * This function works faster with a frozen set and with a non-negative string length argument.
1168
 * @param set the set
1169
 * @param s start of the string (UTF-8)
1170
 * @param length of the string; can be -1 for NUL-terminated
1171
 * @param spanCondition specifies the containment condition
1172
 * @return the length of the initial substring according to the spanCondition;
1173
 *         0 if the start of the string does not fit the spanCondition
1174
 * @stable ICU 3.8
1175
 * @see USetSpanCondition
1176
 */
1177
U_CAPI int32_t U_EXPORT2
1178
uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1179
1180
/**
1181
 * Returns the start of the trailing substring of the input string which
1182
 * consists only of characters and strings that are contained in this set
1183
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1184
 * or only of characters and strings that are not contained
1185
 * in this set (USET_SPAN_NOT_CONTAINED).
1186
 * See USetSpanCondition for details.
1187
 * Malformed byte sequences are treated according to contains(0xfffd).
1188
 * This function works faster with a frozen set and with a non-negative string length argument.
1189
 * @param set the set
1190
 * @param s start of the string (UTF-8)
1191
 * @param length of the string; can be -1 for NUL-terminated
1192
 * @param spanCondition specifies the containment condition
1193
 * @return the start of the trailing substring according to the spanCondition;
1194
 *         the string length if the end of the string does not fit the spanCondition
1195
 * @stable ICU 3.8
1196
 * @see USetSpanCondition
1197
 */
1198
U_CAPI int32_t U_EXPORT2
1199
uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1200
1201
/**
1202
 * Returns true if set1 contains all of the characters and strings
1203
 * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
1204
 * @param set1 set to be checked for containment
1205
 * @param set2 set to be checked for containment
1206
 * @return true if the test condition is met
1207
 * @stable ICU 3.2
1208
 */
1209
U_CAPI UBool U_EXPORT2
1210
uset_equals(const USet* set1, const USet* set2);
1211
1212
/*********************************************************************
1213
 * Serialized set API
1214
 *********************************************************************/
1215
1216
/**
1217
 * Serializes this set into an array of 16-bit integers.  Serialization
1218
 * (currently) only records the characters in the set; multicharacter
1219
 * strings are ignored.
1220
 *
1221
 * The array
1222
 * has following format (each line is one 16-bit integer):
1223
 *
1224
 *  length     = (n+2*m) | (m!=0?0x8000:0)
1225
 *  bmpLength  = n; present if m!=0
1226
 *  bmp[0]
1227
 *  bmp[1]
1228
 *  ...
1229
 *  bmp[n-1]
1230
 *  supp-high[0]
1231
 *  supp-low[0]
1232
 *  supp-high[1]
1233
 *  supp-low[1]
1234
 *  ...
1235
 *  supp-high[m-1]
1236
 *  supp-low[m-1]
1237
 *
1238
 * The array starts with a header.  After the header are n bmp
1239
 * code points, then m supplementary code points.  Either n or m
1240
 * or both may be zero.  n+2*m is always <= 0x7FFF.
1241
 *
1242
 * If there are no supplementary characters (if m==0) then the
1243
 * header is one 16-bit integer, 'length', with value n.
1244
 *
1245
 * If there are supplementary characters (if m!=0) then the header
1246
 * is two 16-bit integers.  The first, 'length', has value
1247
 * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
1248
 *
1249
 * After the header the code points are stored in ascending order.
1250
 * Supplementary code points are stored as most significant 16
1251
 * bits followed by least significant 16 bits.
1252
 *
1253
 * @param set the set
1254
 * @param dest pointer to buffer of destCapacity 16-bit integers.
1255
 * May be NULL only if destCapacity is zero.
1256
 * @param destCapacity size of dest, or zero.  Must not be negative.
1257
 * @param pErrorCode pointer to the error code.  Will be set to
1258
 * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
1259
 * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
1260
 * @return the total length of the serialized format, including
1261
 * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
1262
 * than U_BUFFER_OVERFLOW_ERROR.
1263
 * @stable ICU 2.4
1264
 */
1265
U_CAPI int32_t U_EXPORT2
1266
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1267
1268
/**
1269
 * Given a serialized array, fill in the given serialized set object.
1270
 * @param fillSet pointer to result
1271
 * @param src pointer to start of array
1272
 * @param srcLength length of array
1273
 * @return true if the given array is valid, otherwise false
1274
 * @stable ICU 2.4
1275
 */
1276
U_CAPI UBool U_EXPORT2
1277
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1278
1279
/**
1280
 * Set the USerializedSet to contain the given character (and nothing
1281
 * else).
1282
 * @param fillSet pointer to result
1283
 * @param c The codepoint to set
1284
 * @stable ICU 2.4
1285
 */
1286
U_CAPI void U_EXPORT2
1287
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
1288
1289
/**
1290
 * Returns true if the given USerializedSet contains the given
1291
 * character.
1292
 * @param set the serialized set
1293
 * @param c The codepoint to check for within the set
1294
 * @return true if set contains c
1295
 * @stable ICU 2.4
1296
 */
1297
U_CAPI UBool U_EXPORT2
1298
uset_serializedContains(const USerializedSet* set, UChar32 c);
1299
1300
/**
1301
 * Returns the number of disjoint ranges of characters contained in
1302
 * the given serialized set.  Ignores any strings contained in the
1303
 * set.
1304
 * @param set the serialized set
1305
 * @return a non-negative integer counting the character ranges
1306
 * contained in set
1307
 * @stable ICU 2.4
1308
 */
1309
U_CAPI int32_t U_EXPORT2
1310
uset_getSerializedRangeCount(const USerializedSet* set);
1311
1312
/**
1313
 * Returns a range of characters contained in the given serialized
1314
 * set.
1315
 * @param set the serialized set
1316
 * @param rangeIndex a non-negative integer in the range 0..
1317
 * uset_getSerializedRangeCount(set)-1
1318
 * @param pStart pointer to variable to receive first character
1319
 * in range, inclusive
1320
 * @param pEnd pointer to variable to receive last character in range,
1321
 * inclusive
1322
 * @return true if rangeIndex is valid, otherwise false
1323
 * @stable ICU 2.4
1324
 */
1325
U_CAPI UBool U_EXPORT2
1326
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1327
                        UChar32* pStart, UChar32* pEnd);
1328
1329
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1330
#ifndef U_HIDE_DRAFT_API
1331
1332
namespace U_HEADER_ONLY_NAMESPACE {
1333
1334
// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1335
// not intended to be used via export from the ICU DLL.
1336
1337
/**
1338
 * Iterator returned by USetCodePoints.
1339
 * @draft ICU 76
1340
 */
1341
class USetCodePointIterator {
1342
public:
1343
    /** @draft ICU 76 */
1344
    USetCodePointIterator(const USetCodePointIterator &other) = default;
1345
1346
    /** @draft ICU 76 */
1347
0
    bool operator==(const USetCodePointIterator &other) const {
1348
0
        // No need to compare rangeCount & end given private constructor
1349
0
        // and assuming we don't compare iterators across the set being modified.
1350
0
        // And comparing rangeIndex is redundant with comparing c.
1351
0
        // We might even skip comparing uset.
1352
0
        // Unless we want operator==() to be "correct" for more than iteration.
1353
0
        return uset == other.uset && c == other.c;
1354
0
    }
1355
1356
    /** @draft ICU 76 */
1357
0
    bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1358
1359
    /** @draft ICU 76 */
1360
0
    UChar32 operator*() const { return c; }
1361
1362
    /**
1363
     * Pre-increment.
1364
     * @draft ICU 76
1365
     */
1366
0
    USetCodePointIterator &operator++() {
1367
0
        if (c < end) {
1368
0
            ++c;
1369
0
        } else if (rangeIndex < rangeCount) {
1370
0
            UErrorCode errorCode = U_ZERO_ERROR;
1371
0
            int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1372
0
            if (U_SUCCESS(errorCode) && result == 0) {
1373
0
                ++rangeIndex;
1374
0
            } else {
1375
0
                c = end = U_SENTINEL;
1376
0
            }
1377
0
        } else {
1378
0
            c = end = U_SENTINEL;
1379
0
        }
1380
0
        return *this;
1381
0
    }
1382
1383
    /**
1384
     * Post-increment.
1385
     * @draft ICU 76
1386
     */
1387
0
    USetCodePointIterator operator++(int) {
1388
0
        USetCodePointIterator result(*this);
1389
0
        operator++();
1390
0
        return result;
1391
0
    }
1392
1393
private:
1394
    friend class USetCodePoints;
1395
1396
    USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1397
            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
1398
0
                c(U_SENTINEL), end(U_SENTINEL) {
1399
0
        // Fetch the first range.
1400
0
        operator++();
1401
0
    }
1402
1403
    const USet *uset;
1404
    int32_t rangeIndex;
1405
    int32_t rangeCount;
1406
    UChar32 c, end;
1407
};
1408
1409
/**
1410
 * C++ "range" for iterating over the code points of a USet.
1411
 *
1412
 * \code
1413
 * using U_HEADER_NESTED_NAMESPACE::USetCodePoints;
1414
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
1415
 * for (UChar32 c : USetCodePoints(uset.getAlias())) {
1416
 *     printf("uset.codePoint U+%04lx\n", (long)c);
1417
 * }
1418
 * \endcode
1419
 *
1420
 * C++ UnicodeSet has member functions for iteration, including codePoints().
1421
 *
1422
 * @draft ICU 76
1423
 * @see USetRanges
1424
 * @see USetStrings
1425
 * @see USetElements
1426
 */
1427
class USetCodePoints {
1428
public:
1429
    /**
1430
     * Constructs a C++ "range" object over the code points of the USet.
1431
     * @draft ICU 76
1432
     */
1433
0
    USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1434
1435
    /** @draft ICU 76 */
1436
    USetCodePoints(const USetCodePoints &other) = default;
1437
1438
    /** @draft ICU 76 */
1439
0
    USetCodePointIterator begin() const {
1440
0
        return USetCodePointIterator(uset, 0, rangeCount);
1441
0
    }
1442
1443
    /** @draft ICU 76 */
1444
0
    USetCodePointIterator end() const {
1445
0
        return USetCodePointIterator(uset, rangeCount, rangeCount);
1446
0
    }
1447
1448
private:
1449
    const USet *uset;
1450
    int32_t rangeCount;
1451
};
1452
1453
/**
1454
 * A contiguous range of code points in a USet/UnicodeSet.
1455
 * Returned by USetRangeIterator which is returned by USetRanges.
1456
 * Both the rangeStart and rangeEnd are in the range.
1457
 * (end() returns an iterator corresponding to rangeEnd+1.)
1458
 * @draft ICU 76
1459
 */
1460
struct CodePointRange {
1461
    /** @draft ICU 76 */
1462
    struct iterator {
1463
        /** @draft ICU 76 */
1464
0
        iterator(UChar32 aC) : c(aC) {}
1465
1466
        /** @draft ICU 76 */
1467
0
        bool operator==(const iterator &other) const { return c == other.c; }
1468
        /** @draft ICU 76 */
1469
0
        bool operator!=(const iterator &other) const { return !operator==(other); }
1470
1471
        /** @draft ICU 76 */
1472
0
        UChar32 operator*() const { return c; }
1473
1474
        /**
1475
         * Pre-increment.
1476
         * @draft ICU 76
1477
         */
1478
0
        iterator &operator++() {
1479
0
            ++c;
1480
0
            return *this;
1481
0
        }
1482
1483
        /**
1484
         * Post-increment.
1485
         * @draft ICU 76
1486
         */
1487
0
        iterator operator++(int) {
1488
0
            return c++;
1489
0
        }
1490
1491
        /**
1492
         * The current code point in the range.
1493
         * @draft ICU 76
1494
         */
1495
        UChar32 c;
1496
    };
1497
1498
    /** @draft ICU 76 */
1499
0
    CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
1500
    /** @draft ICU 76 */
1501
    CodePointRange(const CodePointRange &other) = default;
1502
    /** @draft ICU 76 */
1503
0
    size_t size() const { return (rangeEnd + 1) - rangeStart; }
1504
    /** @draft ICU 76 */
1505
0
    iterator begin() const { return rangeStart; }
1506
    /** @draft ICU 76 */
1507
0
    iterator end() const { return rangeEnd + 1; }
1508
1509
    /**
1510
     * Start of a USet/UnicodeSet range of code points.
1511
     * @draft ICU 76
1512
     */
1513
    UChar32 rangeStart;
1514
    /**
1515
     * Inclusive end of a USet/UnicodeSet range of code points.
1516
     * @draft ICU 76
1517
     */
1518
    UChar32 rangeEnd;
1519
};
1520
1521
/**
1522
 * Iterator returned by USetRanges.
1523
 * @draft ICU 76
1524
 */
1525
class USetRangeIterator {
1526
public:
1527
    /** @draft ICU 76 */
1528
    USetRangeIterator(const USetRangeIterator &other) = default;
1529
1530
    /** @draft ICU 76 */
1531
0
    bool operator==(const USetRangeIterator &other) const {
1532
0
        // No need to compare rangeCount given private constructor
1533
0
        // and assuming we don't compare iterators across the set being modified.
1534
0
        // We might even skip comparing uset.
1535
0
        // Unless we want operator==() to be "correct" for more than iteration.
1536
0
        return uset == other.uset && rangeIndex == other.rangeIndex;
1537
0
    }
1538
1539
    /** @draft ICU 76 */
1540
0
    bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1541
1542
    /** @draft ICU 76 */
1543
0
    CodePointRange operator*() const {
1544
0
        if (rangeIndex < rangeCount) {
1545
0
            UChar32 start, end;
1546
0
            UErrorCode errorCode = U_ZERO_ERROR;
1547
0
            int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1548
0
            if (U_SUCCESS(errorCode) && result == 0) {
1549
0
                return CodePointRange(start, end);
1550
0
            }
1551
0
        }
1552
0
        return CodePointRange(U_SENTINEL, U_SENTINEL);
1553
0
    }
1554
1555
    /**
1556
     * Pre-increment.
1557
     * @draft ICU 76
1558
     */
1559
0
    USetRangeIterator &operator++() {
1560
0
        ++rangeIndex;
1561
0
        return *this;
1562
0
    }
1563
1564
    /**
1565
     * Post-increment.
1566
     * @draft ICU 76
1567
     */
1568
0
    USetRangeIterator operator++(int) {
1569
0
        USetRangeIterator result(*this);
1570
0
        ++rangeIndex;
1571
0
        return result;
1572
0
    }
1573
1574
private:
1575
    friend class USetRanges;
1576
1577
    USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1578
0
            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}
1579
1580
    const USet *uset;
1581
    int32_t rangeIndex;
1582
    int32_t rangeCount;
1583
};
1584
1585
/**
1586
 * C++ "range" for iterating over the code point ranges of a USet.
1587
 *
1588
 * \code
1589
 * using U_HEADER_NESTED_NAMESPACE::USetRanges;
1590
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
1591
 * for (auto [start, end] : USetRanges(uset.getAlias())) {
1592
 *     printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end);
1593
 * }
1594
 * for (auto range : USetRanges(uset.getAlias())) {
1595
 *     for (UChar32 c : range) {
1596
 *         printf("uset.range.c U+%04lx\n", (long)c);
1597
 *     }
1598
 * }
1599
 * \endcode
1600
 *
1601
 * C++ UnicodeSet has member functions for iteration, including ranges().
1602
 *
1603
 * @draft ICU 76
1604
 * @see USetCodePoints
1605
 * @see USetStrings
1606
 * @see USetElements
1607
 */
1608
class USetRanges {
1609
public:
1610
    /**
1611
     * Constructs a C++ "range" object over the code point ranges of the USet.
1612
     * @draft ICU 76
1613
     */
1614
0
    USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1615
1616
    /** @draft ICU 76 */
1617
    USetRanges(const USetRanges &other) = default;
1618
1619
    /** @draft ICU 76 */
1620
0
    USetRangeIterator begin() const {
1621
0
        return USetRangeIterator(uset, 0, rangeCount);
1622
0
    }
1623
1624
    /** @draft ICU 76 */
1625
0
    USetRangeIterator end() const {
1626
0
        return USetRangeIterator(uset, rangeCount, rangeCount);
1627
0
    }
1628
1629
private:
1630
    const USet *uset;
1631
    int32_t rangeCount;
1632
};
1633
1634
/**
1635
 * Iterator returned by USetStrings.
1636
 * @draft ICU 76
1637
 */
1638
class USetStringIterator {
1639
public:
1640
    /** @draft ICU 76 */
1641
    USetStringIterator(const USetStringIterator &other) = default;
1642
1643
    /** @draft ICU 76 */
1644
0
    bool operator==(const USetStringIterator &other) const {
1645
0
        // No need to compare count given private constructor
1646
0
        // and assuming we don't compare iterators across the set being modified.
1647
0
        // We might even skip comparing uset.
1648
0
        // Unless we want operator==() to be "correct" for more than iteration.
1649
0
        return uset == other.uset && index == other.index;
1650
0
    }
1651
1652
    /** @draft ICU 76 */
1653
0
    bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1654
1655
    /** @draft ICU 76 */
1656
0
    std::u16string_view operator*() const {
1657
0
        if (index < count) {
1658
0
            int32_t length;
1659
0
            const UChar *uchars = uset_getString(uset, index, &length);
1660
0
            // assert uchars != nullptr;
1661
0
            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1662
0
        }
1663
0
        return {};
1664
0
    }
1665
1666
    /**
1667
     * Pre-increment.
1668
     * @draft ICU 76
1669
     */
1670
0
    USetStringIterator &operator++() {
1671
0
        ++index;
1672
0
        return *this;
1673
0
    }
1674
1675
    /**
1676
     * Post-increment.
1677
     * @draft ICU 76
1678
     */
1679
0
    USetStringIterator operator++(int) {
1680
0
        USetStringIterator result(*this);
1681
0
        ++index;
1682
0
        return result;
1683
0
    }
1684
1685
private:
1686
    friend class USetStrings;
1687
1688
    USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
1689
0
            : uset(pUset), index(nIndex), count(nCount) {}
1690
1691
    const USet *uset;
1692
    int32_t index;
1693
    int32_t count;
1694
};
1695
1696
/**
1697
 * C++ "range" for iterating over the empty and multi-character strings of a USet.
1698
 *
1699
 * \code
1700
 * using U_HEADER_NESTED_NAMESPACE::USetStrings;
1701
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
1702
 * for (auto s : USetStrings(uset.getAlias())) {
1703
 *     int32_t len32 = s.length();
1704
 *     char utf8[200];
1705
 *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
1706
 *                        s.data(), len32, 0xFFFD, nullptr, errorCode);
1707
 *     printf("uset.string length %ld \"%s\"\n", long{len32}, utf8);
1708
 * }
1709
 * \endcode
1710
 *
1711
 * C++ UnicodeSet has member functions for iteration, including strings().
1712
 *
1713
 * @draft ICU 76
1714
 * @see USetCodePoints
1715
 * @see USetRanges
1716
 * @see USetElements
1717
 */
1718
class USetStrings {
1719
public:
1720
    /**
1721
     * Constructs a C++ "range" object over the strings of the USet.
1722
     * @draft ICU 76
1723
     */
1724
0
    USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
1725
1726
    /** @draft ICU 76 */
1727
    USetStrings(const USetStrings &other) = default;
1728
1729
    /** @draft ICU 76 */
1730
0
    USetStringIterator begin() const {
1731
0
        return USetStringIterator(uset, 0, count);
1732
0
    }
1733
1734
    /** @draft ICU 76 */
1735
0
    USetStringIterator end() const {
1736
0
        return USetStringIterator(uset, count, count);
1737
0
    }
1738
1739
private:
1740
    const USet *uset;
1741
    int32_t count;
1742
};
1743
#endif  // U_HIDE_DRAFT_API
1744
1745
#ifndef U_HIDE_DRAFT_API
1746
/**
1747
 * Iterator returned by USetElements.
1748
 * @draft ICU 77
1749
 */
1750
class USetElementIterator {
1751
public:
1752
    /** @draft ICU 77 */
1753
    USetElementIterator(const USetElementIterator &other) = default;
1754
1755
    /** @draft ICU 77 */
1756
0
    bool operator==(const USetElementIterator &other) const {
1757
0
        // No need to compare rangeCount & end given private constructor
1758
0
        // and assuming we don't compare iterators across the set being modified.
1759
0
        // We might even skip comparing uset.
1760
0
        // Unless we want operator==() to be "correct" for more than iteration.
1761
0
        return uset == other.uset && c == other.c && index == other.index;
1762
0
    }
1763
1764
    /** @draft ICU 77 */
1765
0
    bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1766
1767
    /** @draft ICU 77 */
1768
0
    std::u16string operator*() const {
1769
0
        if (c >= 0) {
1770
0
            return c <= 0xffff ?
1771
0
                std::u16string({static_cast<char16_t>(c)}) :
1772
0
                std::u16string({U16_LEAD(c), U16_TRAIL(c)});
1773
0
        } else if (index < totalCount) {
1774
0
            int32_t length;
1775
0
            const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1776
0
            // assert uchars != nullptr;
1777
0
            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1778
0
        } else {
1779
0
            return {};
1780
0
        }
1781
0
    }
1782
1783
    /**
1784
     * Pre-increment.
1785
     * @draft ICU 77
1786
     */
1787
0
    USetElementIterator &operator++() {
1788
0
        if (c < end) {
1789
0
            ++c;
1790
0
        } else if (index < rangeCount) {
1791
0
            UErrorCode errorCode = U_ZERO_ERROR;
1792
0
            int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1793
0
            if (U_SUCCESS(errorCode) && result == 0) {
1794
0
                ++index;
1795
0
            } else {
1796
0
                c = end = U_SENTINEL;
1797
0
            }
1798
0
        } else if (c >= 0) {
1799
0
            // assert index == rangeCount;
1800
0
            // Switch from the last range to the first string.
1801
0
            c = end = U_SENTINEL;
1802
0
        } else {
1803
0
            ++index;
1804
0
        }
1805
0
        return *this;
1806
0
    }
1807
1808
    /**
1809
     * Post-increment.
1810
     * @draft ICU 77
1811
     */
1812
0
    USetElementIterator operator++(int) {
1813
0
        USetElementIterator result(*this);
1814
0
        operator++();
1815
0
        return result;
1816
0
    }
1817
1818
private:
1819
    friend class USetElements;
1820
1821
    USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
1822
            : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
1823
0
                c(U_SENTINEL), end(U_SENTINEL) {
1824
0
        if (index < rangeCount) {
1825
0
            // Fetch the first range.
1826
0
            operator++();
1827
0
        }
1828
0
        // Otherwise don't move beyond the (index - rangeCount)-th string.
1829
0
    }
1830
1831
    const USet *uset;
1832
    int32_t index;
1833
    /** Number of UnicodeSet/USet code point ranges. */
1834
    int32_t rangeCount;
1835
    /**
1836
     * Number of code point ranges plus number of strings.
1837
     * index starts from 0, counts ranges while less than rangeCount,
1838
     * then counts strings while at least rangeCount and less than totalCount.
1839
     *
1840
     * Note that totalCount is the same as uset_getItemCount(), but usually
1841
     * smaller than the number of elements returned by this iterator
1842
     * because we return each code point of each range.
1843
     */
1844
    int32_t totalCount;
1845
    UChar32 c, end;
1846
};
1847
1848
/**
1849
 * A C++ "range" for iterating over all of the elements of a USet.
1850
 * Convenient all-in one iteration, but creates a std::u16string for each
1851
 * code point or string.
1852
 *
1853
 * Code points are returned first, then empty and multi-character strings.
1854
 *
1855
 * \code
1856
 * using U_HEADER_NESTED_NAMESPACE::USetElements;
1857
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
1858
 * for (auto el : USetElements(uset.getAlias())) {
1859
 *     int32_t len32 = el.length();
1860
 *     char utf8[200];
1861
 *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
1862
 *                        el.data(), len32, 0xFFFD, nullptr, errorCode);
1863
 *     printf("uset.element length %ld \"%s\"\n", long{len32}, utf8);
1864
 * }
1865
 * \endcode
1866
 *
1867
 * C++ UnicodeSet has member functions for iteration, including begin() and end().
1868
 *
1869
 * @return an all-elements iterator.
1870
 * @draft ICU 77
1871
 * @see USetCodePoints
1872
 * @see USetRanges
1873
 * @see USetStrings
1874
 */
1875
class USetElements {
1876
public:
1877
    /**
1878
     * Constructs a C++ "range" object over all of the elements of the USet.
1879
     * @draft ICU 77
1880
     */
1881
    USetElements(const USet *pUset)
1882
        : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
1883
0
            stringCount(uset_getStringCount(pUset)) {}
1884
1885
    /** @draft ICU 77 */
1886
    USetElements(const USetElements &other) = default;
1887
1888
    /** @draft ICU 77 */
1889
0
    USetElementIterator begin() const {
1890
0
        return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1891
0
    }
1892
1893
    /** @draft ICU 77 */
1894
0
    USetElementIterator end() const {
1895
0
        return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1896
0
    }
1897
1898
private:
1899
    const USet *uset;
1900
    int32_t rangeCount, stringCount;
1901
};
1902
1903
}  // namespace U_HEADER_ONLY_NAMESPACE
1904
1905
#endif  // U_HIDE_DRAFT_API
1906
#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1907
1908
#endif  // __USET_H__