Coverage Report

Created: 2026-01-21 08:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/node/deps/icu-small/source/common/unicode/uset.h
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2002-2014, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uset.h
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002mar07
16
*   created by: Markus W. Scherer
17
*
18
*   C version of UnicodeSet.
19
*/
20
21
22
/**
23
 * \file
24
 * \brief C API: Unicode Set
25
 *
26
 * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
27
 */
28
29
#ifndef __USET_H__
30
#define __USET_H__
31
32
#include "unicode/utypes.h"
33
#include "unicode/uchar.h"
34
35
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
36
#include <string>
37
#include <string_view>
38
#include "unicode/char16ptr.h"
39
#include "unicode/localpointer.h"
40
#include "unicode/utf16.h"
41
#endif
42
43
#ifndef USET_DEFINED
44
45
#ifndef U_IN_DOXYGEN
46
#define USET_DEFINED
47
#endif
48
/**
49
 * USet is the C API type corresponding to C++ class UnicodeSet.
50
 * Use the uset_* API to manipulate.  Create with
51
 * uset_open*, and destroy with uset_close.
52
 * @stable ICU 2.4
53
 */
54
typedef struct USet USet;
55
#endif
56
57
/**
58
 * Bitmask values to be passed to uset_openPatternOptions() or
59
 * uset_applyPattern() taking an option parameter.
60
 *
61
 * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
62
 * These case options are mutually exclusive.
63
 *
64
 * Undefined options bits are ignored, and reserved for future use.
65
 *
66
 * @stable ICU 2.4
67
 */
68
enum {
69
    /**
70
     * Ignore white space within patterns unless quoted or escaped.
71
     * @stable ICU 2.4
72
     */
73
    USET_IGNORE_SPACE = 1,
74
75
    /**
76
     * Enable case insensitive matching.  E.g., "[ab]" with this flag
77
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
78
     * match all except 'a', 'A', 'b', and 'B'. This performs a full
79
     * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
80
     *
81
     * The resulting set is a superset of the input for the code points but
82
     * not for the strings.
83
     * It performs a case mapping closure of the code points and adds
84
     * full case folding strings for the code points, and reduces strings of
85
     * the original set to their full case folding equivalents.
86
     *
87
     * This is designed for case-insensitive matches, for example
88
     * in regular expressions. The full code point case closure allows checking of
89
     * an input character directly against the closure set.
90
     * Strings are matched by comparing the case-folded form from the closure
91
     * set with an incremental case folding of the string in question.
92
     *
93
     * The closure set will also contain single code points if the original
94
     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
95
     * This is not necessary (that is, redundant) for the above matching method
96
     * but results in the same closure sets regardless of whether the original
97
     * set contained the code point or a string.
98
     *
99
     * @stable ICU 2.4
100
     */
101
    USET_CASE_INSENSITIVE = 2,
102
103
    /**
104
     * Adds all case mappings for each element in the set.
105
     * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
106
     * of each existing element in the set.
107
     *
108
     * Unlike the “case insensitive” options, this does not perform a closure.
109
     * For example, it does not add 'ſ' (U+017F long s) for 's',
110
     * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
111
     *
112
     * @stable ICU 3.2
113
     */
114
    USET_ADD_CASE_MAPPINGS = 4,
115
116
    /**
117
     * Enable case insensitive matching.
118
     * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
119
     * which map each code point to one code point,
120
     * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
121
     *
122
     * This is designed for case-insensitive matches, for example in certain
123
     * regular expression implementations where only Simple_Case_Folding mappings are used,
124
     * such as in ECMAScript (JavaScript) regular expressions.
125
     *
126
     * @stable ICU 73
127
     */
128
    USET_SIMPLE_CASE_INSENSITIVE = 6
129
};
130
131
/**
132
 * Argument values for whether span() and similar functions continue while
133
 * the current character is contained vs. not contained in the set.
134
 *
135
 * The functionality is straightforward for sets with only single code points,
136
 * without strings (which is the common case):
137
 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
138
 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
139
 * - span() and spanBack() partition any string the same way when
140
 *   alternating between span(USET_SPAN_NOT_CONTAINED) and
141
 *   span(either "contained" condition).
142
 * - Using a complemented (inverted) set and the opposite span conditions
143
 *   yields the same results.
144
 *
145
 * When a set contains multi-code point strings, then these statements may not
146
 * be true, depending on the strings in the set (for example, whether they
147
 * overlap with each other) and the string that is processed.
148
 * For a set with strings:
149
 * - The complement of the set contains the opposite set of code points,
150
 *   but the same set of strings.
151
 *   Therefore, complementing both the set and the span conditions
152
 *   may yield different results.
153
 * - When starting spans at different positions in a string
154
 *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
155
 *   because a set string may start before the later position.
156
 * - span(USET_SPAN_SIMPLE) may be shorter than
157
 *   span(USET_SPAN_CONTAINED) because it will not recursively try
158
 *   all possible paths.
159
 *   For example, with a set which contains the three strings "xy", "xya" and "ax",
160
 *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
161
 *   span("xyax", USET_SPAN_SIMPLE) will return 3.
162
 *   span(USET_SPAN_SIMPLE) will never be longer than
163
 *   span(USET_SPAN_CONTAINED).
164
 * - With either "contained" condition, span() and spanBack() may partition
165
 *   a string in different ways.
166
 *   For example, with a set which contains the two strings "ab" and "ba",
167
 *   and when processing the string "aba",
168
 *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
169
 *   while spanBack() will yield boundaries of { 0, 1, 3 }.
170
 *
171
 * Note: If it is important to get the same boundaries whether iterating forward
172
 * or backward through a string, then either only span() should be used and
173
 * the boundaries cached for backward operation, or an ICU BreakIterator
174
 * could be used.
175
 *
176
 * Note: Unpaired surrogates are treated like surrogate code points.
177
 * Similarly, set strings match only on code point boundaries,
178
 * never in the middle of a surrogate pair.
179
 * Illegal UTF-8 sequences are treated like U+FFFD.
180
 * When processing UTF-8 strings, malformed set strings
181
 * (strings with unpaired surrogates which cannot be converted to UTF-8)
182
 * are ignored.
183
 *
184
 * @stable ICU 3.8
185
 */
186
typedef enum USetSpanCondition {
187
    /**
188
     * Continues a span() while there is no set element at the current position.
189
     * Increments by one code point at a time.
190
     * Stops before the first set element (character or string).
191
     * (For code points only, this is like while contains(current)==false).
192
     *
193
     * When span() returns, the substring between where it started and the position
194
     * it returned consists only of characters that are not in the set,
195
     * and none of its strings overlap with the span.
196
     *
197
     * @stable ICU 3.8
198
     */
199
    USET_SPAN_NOT_CONTAINED = 0,
200
    /**
201
     * Spans the longest substring that is a concatenation of set elements (characters or strings).
202
     * (For characters only, this is like while contains(current)==true).
203
     *
204
     * When span() returns, the substring between where it started and the position
205
     * it returned consists only of set elements (characters or strings) that are in the set.
206
     *
207
     * If a set contains strings, then the span will be the longest substring for which there
208
     * exists at least one non-overlapping concatenation of set elements (characters or strings).
209
     * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
210
     * (Java/ICU/Perl regex stops at the first match of an OR.)
211
     *
212
     * @stable ICU 3.8
213
     */
214
    USET_SPAN_CONTAINED = 1,
215
    /**
216
     * Continues a span() while there is a set element at the current position.
217
     * Increments by the longest matching element at each position.
218
     * (For characters only, this is like while contains(current)==true).
219
     *
220
     * When span() returns, the substring between where it started and the position
221
     * it returned consists only of set elements (characters or strings) that are in the set.
222
     *
223
     * If a set only contains single characters, then this is the same
224
     * as USET_SPAN_CONTAINED.
225
     *
226
     * If a set contains strings, then the span will be the longest substring
227
     * with a match at each position with the longest single set element (character or string).
228
     *
229
     * Use this span condition together with other longest-match algorithms,
230
     * such as ICU converters (ucnv_getUnicodeSet()).
231
     *
232
     * @stable ICU 3.8
233
     */
234
    USET_SPAN_SIMPLE = 2,
235
#ifndef U_HIDE_DEPRECATED_API
236
    /**
237
     * One more than the last span condition.
238
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
239
     */
240
    USET_SPAN_CONDITION_COUNT
241
#endif  // U_HIDE_DEPRECATED_API
242
} USetSpanCondition;
243
244
enum {
245
    /**
246
     * Capacity of USerializedSet::staticArray.
247
     * Enough for any single-code point set.
248
     * Also provides padding for nice sizeof(USerializedSet).
249
     * @stable ICU 2.4
250
     */
251
    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
252
};
253
254
/**
255
 * A serialized form of a Unicode set.  Limited manipulations are
256
 * possible directly on a serialized set.  See below.
257
 * @stable ICU 2.4
258
 */
259
typedef struct USerializedSet {
260
    /**
261
     * The serialized Unicode Set.
262
     * @stable ICU 2.4
263
     */
264
    const uint16_t *array;
265
    /**
266
     * The length of the array that contains BMP characters.
267
     * @stable ICU 2.4
268
     */
269
    int32_t bmpLength;
270
    /**
271
     * The total length of the array.
272
     * @stable ICU 2.4
273
     */
274
    int32_t length;
275
    /**
276
     * A small buffer for the array to reduce memory allocations.
277
     * @stable ICU 2.4
278
     */
279
    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
280
} USerializedSet;
281
282
/*********************************************************************
283
 * USet API
284
 *********************************************************************/
285
286
/**
287
 * Create an empty USet object.
288
 * Equivalent to uset_open(1, 0).
289
 * @return a newly created USet.  The caller must call uset_close() on
290
 * it when done.
291
 * @stable ICU 4.2
292
 */
293
U_CAPI USet* U_EXPORT2
294
uset_openEmpty(void);
295
296
/**
297
 * Creates a USet object that contains the range of characters
298
 * start..end, inclusive.  If <code>start > end</code> 
299
 * then an empty set is created (same as using uset_openEmpty()).
300
 * @param start first character of the range, inclusive
301
 * @param end last character of the range, inclusive
302
 * @return a newly created USet.  The caller must call uset_close() on
303
 * it when done.
304
 * @stable ICU 2.4
305
 */
306
U_CAPI USet* U_EXPORT2
307
uset_open(UChar32 start, UChar32 end);
308
309
/**
310
 * Creates a set from the given pattern.  See the UnicodeSet class
311
 * description for the syntax of the pattern language.
312
 * @param pattern a string specifying what characters are in the set
313
 * @param patternLength the length of the pattern, or -1 if null
314
 * terminated
315
 * @param ec the error code
316
 * @stable ICU 2.4
317
 */
318
U_CAPI USet* U_EXPORT2
319
uset_openPattern(const UChar* pattern, int32_t patternLength,
320
                 UErrorCode* ec);
321
322
/**
323
 * Creates a set from the given pattern.  See the UnicodeSet class
324
 * description for the syntax of the pattern language.
325
 * @param pattern a string specifying what characters are in the set
326
 * @param patternLength the length of the pattern, or -1 if null
327
 * terminated
328
 * @param options bitmask for options to apply to the pattern.
329
 * Valid options are USET_IGNORE_SPACE and
330
 * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
331
 * These case options are mutually exclusive.
332
 * @param ec the error code
333
 * @stable ICU 2.4
334
 */
335
U_CAPI USet* U_EXPORT2
336
uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
337
                 uint32_t options,
338
                 UErrorCode* ec);
339
340
/**
341
 * Disposes of the storage used by a USet object.  This function should
342
 * be called exactly once for objects returned by uset_open().
343
 * @param set the object to dispose of
344
 * @stable ICU 2.4
345
 */
346
U_CAPI void U_EXPORT2
347
uset_close(USet* set);
348
349
#if U_SHOW_CPLUSPLUS_API
350
351
U_NAMESPACE_BEGIN
352
353
/**
354
 * \class LocalUSetPointer
355
 * "Smart pointer" class, closes a USet via uset_close().
356
 * For most methods see the LocalPointerBase base class.
357
 *
358
 * @see LocalPointerBase
359
 * @see LocalPointer
360
 * @stable ICU 4.4
361
 */
362
U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
363
364
U_NAMESPACE_END
365
366
#endif
367
368
/**
369
 * Returns a copy of this object.
370
 * If this set is frozen, then the clone will be frozen as well.
371
 * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
372
 * @param set the original set
373
 * @return the newly allocated copy of the set
374
 * @see uset_cloneAsThawed
375
 * @stable ICU 3.8
376
 */
377
U_CAPI USet * U_EXPORT2
378
uset_clone(const USet *set);
379
380
/**
381
 * Determines whether the set has been frozen (made immutable) or not.
382
 * See the ICU4J Freezable interface for details.
383
 * @param set the set
384
 * @return true/false for whether the set has been frozen
385
 * @see uset_freeze
386
 * @see uset_cloneAsThawed
387
 * @stable ICU 3.8
388
 */
389
U_CAPI UBool U_EXPORT2
390
uset_isFrozen(const USet *set);
391
392
/**
393
 * Freeze the set (make it immutable).
394
 * Once frozen, it cannot be unfrozen and is therefore thread-safe
395
 * until it is deleted.
396
 * See the ICU4J Freezable interface for details.
397
 * Freezing the set may also make some operations faster, for example
398
 * uset_contains() and uset_span().
399
 * A frozen set will not be modified. (It remains frozen.)
400
 * @param set the set
401
 * @return the same set, now frozen
402
 * @see uset_isFrozen
403
 * @see uset_cloneAsThawed
404
 * @stable ICU 3.8
405
 */
406
U_CAPI void U_EXPORT2
407
uset_freeze(USet *set);
408
409
/**
410
 * Clone the set and make the clone mutable.
411
 * See the ICU4J Freezable interface for details.
412
 * @param set the set
413
 * @return the mutable clone
414
 * @see uset_freeze
415
 * @see uset_isFrozen
416
 * @see uset_clone
417
 * @stable ICU 3.8
418
 */
419
U_CAPI USet * U_EXPORT2
420
uset_cloneAsThawed(const USet *set);
421
422
/**
423
 * Causes the USet object to represent the range <code>start - end</code>.
424
 * If <code>start > end</code> then this USet is set to an empty range.
425
 * A frozen set will not be modified.
426
 * @param set the object to set to the given range
427
 * @param start first character in the set, inclusive
428
 * @param end last character in the set, inclusive
429
 * @stable ICU 3.2
430
 */
431
U_CAPI void U_EXPORT2
432
uset_set(USet* set,
433
         UChar32 start, UChar32 end);
434
435
/**
436
 * Modifies the set to represent the set specified by the given
437
 * pattern. See the UnicodeSet class description for the syntax of 
438
 * the pattern language. See also the User Guide chapter about UnicodeSet.
439
 * <em>Empties the set passed before applying the pattern.</em>
440
 * A frozen set will not be modified.
441
 * @param set               The set to which the pattern is to be applied. 
442
 * @param pattern           A pointer to UChar string specifying what characters are in the set.
443
 *                          The character at pattern[0] must be a '['.
444
 * @param patternLength     The length of the UChar string. -1 if NUL terminated.
445
 * @param options           A bitmask for options to apply to the pattern.
446
 *                          Valid options are USET_IGNORE_SPACE and
447
 *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
448
 *                          USET_SIMPLE_CASE_INSENSITIVE.
449
 *                          These case options are mutually exclusive.
450
 * @param status            Returns an error if the pattern cannot be parsed.
451
 * @return                  Upon successful parse, the value is either
452
 *                          the index of the character after the closing ']' 
453
 *                          of the parsed pattern.
454
 *                          If the status code indicates failure, then the return value 
455
 *                          is the index of the error in the source.
456
 *
457
 * @stable ICU 2.8
458
 */
459
U_CAPI int32_t U_EXPORT2 
460
uset_applyPattern(USet *set,
461
                  const UChar *pattern, int32_t patternLength,
462
                  uint32_t options,
463
                  UErrorCode *status);
464
465
/**
466
 * Modifies the set to contain those code points which have the given value
467
 * for the given binary or enumerated property, as returned by
468
 * u_getIntPropertyValue.  Prior contents of this set are lost.
469
 * A frozen set will not be modified.
470
 *
471
 * @param set the object to contain the code points defined by the property
472
 *
473
 * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
474
 * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
475
 * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
476
 *
477
 * @param value a value in the range u_getIntPropertyMinValue(prop)..
478
 * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
479
 * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
480
 * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
481
 * categories such as [:L:] to be represented.
482
 *
483
 * @param ec error code input/output parameter
484
 *
485
 * @stable ICU 3.2
486
 */
487
U_CAPI void U_EXPORT2
488
uset_applyIntPropertyValue(USet* set,
489
                           UProperty prop, int32_t value, UErrorCode* ec);
490
491
/**
492
 * Modifies the set to contain those code points which have the
493
 * given value for the given property.  Prior contents of this
494
 * set are lost.
495
 * A frozen set will not be modified.
496
 *
497
 * @param set the object to contain the code points defined by the given
498
 * property and value alias
499
 *
500
 * @param prop a string specifying a property alias, either short or long.
501
 * The name is matched loosely.  See PropertyAliases.txt for names and a
502
 * description of loose matching.  If the value string is empty, then this
503
 * string is interpreted as either a General_Category value alias, a Script
504
 * value alias, a binary property alias, or a special ID.  Special IDs are
505
 * matched loosely and correspond to the following sets:
506
 *
507
 * "ANY" = [\\u0000-\\U0010FFFF],
508
 * "ASCII" = [\\u0000-\\u007F],
509
 * "Assigned" = [:^Cn:].
510
 *
511
 * @param propLength the length of the prop, or -1 if NULL
512
 *
513
 * @param value a string specifying a value alias, either short or long.
514
 * The name is matched loosely.  See PropertyValueAliases.txt for names
515
 * and a description of loose matching.  In addition to aliases listed,
516
 * numeric values and canonical combining classes may be expressed
517
 * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
518
 * may also be empty.
519
 *
520
 * @param valueLength the length of the value, or -1 if NULL
521
 *
522
 * @param ec error code input/output parameter
523
 *
524
 * @stable ICU 3.2
525
 */
526
U_CAPI void U_EXPORT2
527
uset_applyPropertyAlias(USet* set,
528
                        const UChar *prop, int32_t propLength,
529
                        const UChar *value, int32_t valueLength,
530
                        UErrorCode* ec);
531
532
/**
533
 * Return true if the given position, in the given pattern, appears
534
 * to be the start of a UnicodeSet pattern.
535
 *
536
 * @param pattern a string specifying the pattern
537
 * @param patternLength the length of the pattern, or -1 if NULL
538
 * @param pos the given position
539
 * @stable ICU 3.2
540
 */
541
U_CAPI UBool U_EXPORT2
542
uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
543
                      int32_t pos);
544
545
/**
546
 * Returns a string representation of this set.  If the result of
547
 * calling this function is passed to a uset_openPattern(), it
548
 * will produce another set that is equal to this one.
549
 * @param set the set
550
 * @param result the string to receive the rules, may be NULL
551
 * @param resultCapacity the capacity of result, may be 0 if result is NULL
552
 * @param escapeUnprintable if true then convert unprintable
553
 * character to their hex escape representations, \\uxxxx or
554
 * \\Uxxxxxxxx.  Unprintable characters are those other than
555
 * U+000A, U+0020..U+007E.
556
 * @param ec error code.
557
 * @return length of string, possibly larger than resultCapacity
558
 * @stable ICU 2.4
559
 */
560
U_CAPI int32_t U_EXPORT2
561
uset_toPattern(const USet* set,
562
               UChar* result, int32_t resultCapacity,
563
               UBool escapeUnprintable,
564
               UErrorCode* ec);
565
566
/**
567
 * Adds the given character to the given USet.  After this call,
568
 * uset_contains(set, c) will return true.
569
 * A frozen set will not be modified.
570
 * @param set the object to which to add the character
571
 * @param c the character to add
572
 * @stable ICU 2.4
573
 */
574
U_CAPI void U_EXPORT2
575
uset_add(USet* set, UChar32 c);
576
577
/**
578
 * Adds all of the elements in the specified set to this set if
579
 * they're not already present.  This operation effectively
580
 * modifies this set so that its value is the <i>union</i> of the two
581
 * sets.  The behavior of this operation is unspecified if the specified
582
 * collection is modified while the operation is in progress.
583
 * A frozen set will not be modified.
584
 *
585
 * @param set the object to which to add the set
586
 * @param additionalSet the source set whose elements are to be added to this set.
587
 * @stable ICU 2.6
588
 */
589
U_CAPI void U_EXPORT2
590
uset_addAll(USet* set, const USet *additionalSet);
591
592
/**
593
 * Adds the given range of characters to the given USet.  After this call,
594
 * uset_contains(set, start, end) will return true.
595
 * A frozen set will not be modified.
596
 * @param set the object to which to add the character
597
 * @param start the first character of the range to add, inclusive
598
 * @param end the last character of the range to add, inclusive
599
 * @stable ICU 2.2
600
 */
601
U_CAPI void U_EXPORT2
602
uset_addRange(USet* set, UChar32 start, UChar32 end);
603
604
/**
605
 * Adds the given string to the given USet.  After this call,
606
 * uset_containsString(set, str, strLen) will return true.
607
 * A frozen set will not be modified.
608
 * @param set the object to which to add the character
609
 * @param str the string to add
610
 * @param strLen the length of the string or -1 if null terminated.
611
 * @stable ICU 2.4
612
 */
613
U_CAPI void U_EXPORT2
614
uset_addString(USet* set, const UChar* str, int32_t strLen);
615
616
/**
617
 * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
618
 * If this set already contains any particular character, it has no effect on that character.
619
 * A frozen set will not be modified.
620
 * @param set the object to which to add the character
621
 * @param str the source string
622
 * @param strLen the length of the string or -1 if null terminated.
623
 * @stable ICU 3.4
624
 */
625
U_CAPI void U_EXPORT2
626
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
627
628
/**
629
 * Removes the given character from the given USet.  After this call,
630
 * uset_contains(set, c) will return false.
631
 * A frozen set will not be modified.
632
 * @param set the object from which to remove the character
633
 * @param c the character to remove
634
 * @stable ICU 2.4
635
 */
636
U_CAPI void U_EXPORT2
637
uset_remove(USet* set, UChar32 c);
638
639
/**
640
 * Removes the given range of characters from the given USet.  After this call,
641
 * uset_contains(set, start, end) will return false.
642
 * A frozen set will not be modified.
643
 * @param set the object to which to add the character
644
 * @param start the first character of the range to remove, inclusive
645
 * @param end the last character of the range to remove, inclusive
646
 * @stable ICU 2.2
647
 */
648
U_CAPI void U_EXPORT2
649
uset_removeRange(USet* set, UChar32 start, UChar32 end);
650
651
/**
652
 * Removes the given string to the given USet.  After this call,
653
 * uset_containsString(set, str, strLen) will return false.
654
 * A frozen set will not be modified.
655
 * @param set the object to which to add the character
656
 * @param str the string to remove
657
 * @param strLen the length of the string or -1 if null terminated.
658
 * @stable ICU 2.4
659
 */
660
U_CAPI void U_EXPORT2
661
uset_removeString(USet* set, const UChar* str, int32_t strLen);
662
663
/**
664
 * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
665
 * A frozen set will not be modified.
666
 *
667
 * @param set the object to be modified
668
 * @param str the string
669
 * @param length the length of the string, or -1 if NUL-terminated
670
 * @stable ICU 69
671
 */
672
U_CAPI void U_EXPORT2
673
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
674
675
/**
676
 * Removes from this set all of its elements that are contained in the
677
 * specified set.  This operation effectively modifies this
678
 * set so that its value is the <i>asymmetric set difference</i> of
679
 * the two sets.
680
 * A frozen set will not be modified.
681
 * @param set the object from which the elements are to be removed
682
 * @param removeSet the object that defines which elements will be
683
 * removed from this set
684
 * @stable ICU 3.2
685
 */
686
U_CAPI void U_EXPORT2
687
uset_removeAll(USet* set, const USet* removeSet);
688
689
/**
690
 * Retain only the elements in this set that are contained in the
691
 * specified range.  If <code>start > end</code> then an empty range is
692
 * retained, leaving the set empty.  This is equivalent to
693
 * a boolean logic AND, or a set INTERSECTION.
694
 * A frozen set will not be modified.
695
 *
696
 * @param set the object for which to retain only the specified range
697
 * @param start first character, inclusive, of range
698
 * @param end last character, inclusive, of range
699
 * @stable ICU 3.2
700
 */
701
U_CAPI void U_EXPORT2
702
uset_retain(USet* set, UChar32 start, UChar32 end);
703
704
/**
705
 * Retains only the specified string from this set if it is present.
706
 * Upon return this set will be empty if it did not contain s, or
707
 * will only contain s if it did contain s.
708
 * A frozen set will not be modified.
709
 *
710
 * @param set the object to be modified
711
 * @param str the string
712
 * @param length the length of the string, or -1 if NUL-terminated
713
 * @stable ICU 69
714
 */
715
U_CAPI void U_EXPORT2
716
uset_retainString(USet *set, const UChar *str, int32_t length);
717
718
/**
719
 * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
720
 * A frozen set will not be modified.
721
 *
722
 * @param set the object to be modified
723
 * @param str the string
724
 * @param length the length of the string, or -1 if NUL-terminated
725
 * @stable ICU 69
726
 */
727
U_CAPI void U_EXPORT2
728
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
729
730
/**
731
 * Retains only the elements in this set that are contained in the
732
 * specified set.  In other words, removes from this set all of
733
 * its elements that are not contained in the specified set.  This
734
 * operation effectively modifies this set so that its value is
735
 * the <i>intersection</i> of the two sets.
736
 * A frozen set will not be modified.
737
 *
738
 * @param set the object on which to perform the retain
739
 * @param retain set that defines which elements this set will retain
740
 * @stable ICU 3.2
741
 */
742
U_CAPI void U_EXPORT2
743
uset_retainAll(USet* set, const USet* retain);
744
745
/**
746
 * Reallocate this objects internal structures to take up the least
747
 * possible space, without changing this object's value.
748
 * A frozen set will not be modified.
749
 *
750
 * @param set the object on which to perform the compact
751
 * @stable ICU 3.2
752
 */
753
U_CAPI void U_EXPORT2
754
uset_compact(USet* set);
755
756
/**
757
 * This is equivalent to
758
 * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
759
 *
760
 * <strong>Note:</strong> This performs a symmetric difference with all code points
761
 * <em>and thus retains all multicharacter strings</em>.
762
 * In order to achieve a “code point complement” (all code points minus this set),
763
 * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
764
 *
765
 * A frozen set will not be modified.
766
 * @param set the set
767
 * @stable ICU 2.4
768
 */
769
U_CAPI void U_EXPORT2
770
uset_complement(USet* set);
771
772
/**
773
 * Complements the specified range in this set.  Any character in
774
 * the range will be removed if it is in this set, or will be
775
 * added if it is not in this set.  If <code>start > end</code>
776
 * then an empty range is complemented, leaving the set unchanged.
777
 * This is equivalent to a boolean logic XOR.
778
 * A frozen set will not be modified.
779
 *
780
 * @param set the object to be modified
781
 * @param start first character, inclusive, of range
782
 * @param end last character, inclusive, of range
783
 * @stable ICU 69
784
 */
785
U_CAPI void U_EXPORT2
786
uset_complementRange(USet *set, UChar32 start, UChar32 end);
787
788
/**
789
 * Complements the specified string in this set.
790
 * The string will be removed if it is in this set, or will be added if it is not in this set.
791
 * A frozen set will not be modified.
792
 *
793
 * @param set the object to be modified
794
 * @param str the string
795
 * @param length the length of the string, or -1 if NUL-terminated
796
 * @stable ICU 69
797
 */
798
U_CAPI void U_EXPORT2
799
uset_complementString(USet *set, const UChar *str, int32_t length);
800
801
/**
802
 * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
803
 * A frozen set will not be modified.
804
 *
805
 * @param set the object to be modified
806
 * @param str the string
807
 * @param length the length of the string, or -1 if NUL-terminated
808
 * @stable ICU 69
809
 */
810
U_CAPI void U_EXPORT2
811
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
812
813
/**
814
 * Complements in this set all elements contained in the specified
815
 * set.  Any character in the other set will be removed if it is
816
 * in this set, or will be added if it is not in this set.
817
 * A frozen set will not be modified.
818
 *
819
 * @param set the set with which to complement
820
 * @param complement set that defines which elements will be xor'ed
821
 * from this set.
822
 * @stable ICU 3.2
823
 */
824
U_CAPI void U_EXPORT2
825
uset_complementAll(USet* set, const USet* complement);
826
827
/**
828
 * Removes all of the elements from this set.  This set will be
829
 * empty after this call returns.
830
 * A frozen set will not be modified.
831
 * @param set the set
832
 * @stable ICU 2.4
833
 */
834
U_CAPI void U_EXPORT2
835
uset_clear(USet* set);
836
837
/**
838
 * Close this set over the given attribute.  For the attribute
839
 * USET_CASE_INSENSITIVE, the result is to modify this set so that:
840
 *
841
 * 1. For each character or string 'a' in this set, all strings or
842
 * characters 'b' such that foldCase(a) == foldCase(b) are added
843
 * to this set.
844
 *
845
 * 2. For each string 'e' in the resulting set, if e !=
846
 * foldCase(e), 'e' will be removed.
847
 *
848
 * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
849
 *
850
 * (Here foldCase(x) refers to the operation u_strFoldCase, and a
851
 * == b denotes that the contents are the same, not pointer
852
 * comparison.)
853
 *
854
 * A frozen set will not be modified.
855
 *
856
 * @param set the set
857
 *
858
 * @param attributes bitmask for attributes to close over.
859
 * Valid options:
860
 * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
861
 * These case options are mutually exclusive.
862
 * Unrelated options bits are ignored.
863
 * @stable ICU 4.2
864
 */
865
U_CAPI void U_EXPORT2
866
uset_closeOver(USet* set, int32_t attributes);
867
868
/**
869
 * Remove all strings from this set.
870
 *
871
 * @param set the set
872
 * @stable ICU 4.2
873
 */
874
U_CAPI void U_EXPORT2
875
uset_removeAllStrings(USet* set);
876
877
/**
878
 * Returns true if the given USet contains no characters and no
879
 * strings.
880
 * @param set the set
881
 * @return true if set is empty
882
 * @stable ICU 2.4
883
 */
884
U_CAPI UBool U_EXPORT2
885
uset_isEmpty(const USet* set);
886
887
/**
888
 * @param set the set
889
 * @return true if this set contains multi-character strings or the empty string.
890
 * @stable ICU 70
891
 */
892
U_CAPI UBool U_EXPORT2
893
uset_hasStrings(const USet *set);
894
895
/**
896
 * Returns true if the given USet contains the given character.
897
 * This function works faster with a frozen set.
898
 * @param set the set
899
 * @param c The codepoint to check for within the set
900
 * @return true if set contains c
901
 * @stable ICU 2.4
902
 */
903
U_CAPI UBool U_EXPORT2
904
uset_contains(const USet* set, UChar32 c);
905
906
/**
907
 * Returns true if the given USet contains all characters c
908
 * where start <= c && c <= end.
909
 * @param set the set
910
 * @param start the first character of the range to test, inclusive
911
 * @param end the last character of the range to test, inclusive
912
 * @return true if set contains the range
913
 * @stable ICU 2.2
914
 */
915
U_CAPI UBool U_EXPORT2
916
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
917
918
/**
919
 * Returns true if the given USet contains the given string.
920
 * @param set the set
921
 * @param str the string
922
 * @param strLen the length of the string or -1 if null terminated.
923
 * @return true if set contains str
924
 * @stable ICU 2.4
925
 */
926
U_CAPI UBool U_EXPORT2
927
uset_containsString(const USet* set, const UChar* str, int32_t strLen);
928
929
/**
930
 * Returns the index of the given character within this set, where
931
 * the set is ordered by ascending code point.  If the character
932
 * is not in this set, return -1.  The inverse of this method is
933
 * <code>charAt()</code>.
934
 * @param set the set
935
 * @param c the character to obtain the index for
936
 * @return an index from 0..size()-1, or -1
937
 * @stable ICU 3.2
938
 */
939
U_CAPI int32_t U_EXPORT2
940
uset_indexOf(const USet* set, UChar32 c);
941
942
/**
943
 * Returns the character at the given index within this set, where
944
 * the set is ordered by ascending code point.  If the index is
945
 * out of range for characters, returns (UChar32)-1.
946
 * The inverse of this method is <code>indexOf()</code>.
947
 *
948
 * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
949
 * with uset_getItem(), because for each call it skips linearly over <code>index</code>
950
 * characters in the ranges.
951
 *
952
 * @param set the set
953
 * @param charIndex an index from 0..size()-1 to obtain the char for
954
 * @return the character at the given index, or (UChar32)-1.
955
 * @stable ICU 3.2
956
 */
957
U_CAPI UChar32 U_EXPORT2
958
uset_charAt(const USet* set, int32_t charIndex);
959
960
/**
961
 * Returns the number of characters and strings contained in this set.
962
 * The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings.
963
 *
964
 * This is slower than uset_getRangeCount() and uset_getItemCount() because
965
 * it counts the code points of all ranges.
966
 *
967
 * @param set the set
968
 * @return a non-negative integer counting the characters and strings
969
 * contained in set
970
 * @stable ICU 2.4
971
 * @see uset_getRangeCount
972
 * @see uset_getStringCount
973
 * @see uset_getItemCount
974
 */
975
U_CAPI int32_t U_EXPORT2
976
uset_size(const USet* set);
977
978
/**
979
 * @param set the set
980
 * @return the number of ranges in this set.
981
 * @stable ICU 70
982
 * @see uset_getItemCount
983
 * @see uset_getItem
984
 * @see uset_getStringCount
985
 * @see uset_size
986
 */
987
U_CAPI int32_t U_EXPORT2
988
uset_getRangeCount(const USet *set);
989
990
/**
991
 * @param set the set
992
 * @return the number of strings in this set.
993
 * @stable ICU 76
994
 * @see uset_getRangeCount
995
 * @see uset_getItemCount
996
 * @see uset_size
997
 */
998
U_CAPI int32_t U_EXPORT2
999
uset_getStringCount(const USet *set);
1000
1001
/**
1002
 * Returns the index-th string (empty or multi-character) in the set.
1003
 * The string may not be NUL-terminated.
1004
 * The output length must be used, and the caller must not read more than that many UChars.
1005
 *
1006
 * @param set the set
1007
 * @param index the string index, 0 .. uset_getStringCount() - 1
1008
 * @param pLength the output string length; must not be NULL
1009
 * @return the pointer to the string; NULL if the index is out of range or pLength is NULL
1010
 * @stable ICU 76
1011
 * @see uset_getStringCount
1012
 */
1013
U_CAPI const UChar* U_EXPORT2
1014
uset_getString(const USet *set, int32_t index, int32_t *pLength);
1015
1016
/**
1017
 * Returns the number of items in this set.  An item is either a range
1018
 * of characters or a single multicharacter string.
1019
 * @param set the set
1020
 * @return a non-negative integer counting the character ranges
1021
 * and/or strings contained in set
1022
 * @stable ICU 2.4
1023
 * @see uset_getRangeCount
1024
 * @see uset_getStringCount
1025
 */
1026
U_CAPI int32_t U_EXPORT2
1027
uset_getItemCount(const USet* set);
1028
1029
/**
1030
 * Returns an item of this set.  An item is either a range of
1031
 * characters or a single multicharacter string (which can be the empty string).
1032
 *
1033
 * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
1034
 * and the range is <code>*start</code>..<code>*end</code>.
1035
 *
1036
 * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
1037
 * this function copies the string into <code>str[strCapacity]</code> and
1038
 * returns the length of the string (0 for the empty string).
1039
 * See uset_getString() for a function that does not copy the string contents.
1040
 *
1041
 * If <code>itemIndex</code> is out of range, then this function returns -1.
1042
 *
1043
 * Note that 0 is returned for each range as well as for the empty string.
1044
 *
1045
 * @param set the set
1046
 * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
1047
 * @param start pointer to variable to receive first character in range, inclusive;
1048
 *              can be NULL for a string item
1049
 * @param end pointer to variable to receive last character in range, inclusive;
1050
 *            can be NULL for a string item
1051
 * @param str buffer to receive the string, may be NULL
1052
 * @param strCapacity capacity of str, or 0 if str is NULL
1053
 * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
1054
 * @return the length of the string (0 or >= 2), or 0 if the item is a range,
1055
 *         or -1 if the itemIndex is out of range
1056
 * @stable ICU 2.4
1057
 * @see uset_getString
1058
 */
1059
U_CAPI int32_t U_EXPORT2
1060
uset_getItem(const USet* set, int32_t itemIndex,
1061
             UChar32* start, UChar32* end,
1062
             UChar* str, int32_t strCapacity,
1063
             UErrorCode* ec);
1064
1065
/**
1066
 * Returns true if set1 contains all the characters and strings
1067
 * of set2. It answers the question, 'Is set1 a superset of set2?'
1068
 * @param set1 set to be checked for containment
1069
 * @param set2 set to be checked for containment
1070
 * @return true if the test condition is met
1071
 * @stable ICU 3.2
1072
 */
1073
U_CAPI UBool U_EXPORT2
1074
uset_containsAll(const USet* set1, const USet* set2);
1075
1076
/**
1077
 * Returns true if this set contains all the characters
1078
 * of the given string. This is does not check containment of grapheme
1079
 * clusters, like uset_containsString.
1080
 * @param set set of characters to be checked for containment
1081
 * @param str string containing codepoints to be checked for containment
1082
 * @param strLen the length of the string or -1 if null terminated.
1083
 * @return true if the test condition is met
1084
 * @stable ICU 3.4
1085
 */
1086
U_CAPI UBool U_EXPORT2
1087
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1088
1089
/**
1090
 * Returns true if set1 contains none of the characters and strings
1091
 * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
1092
 * @param set1 set to be checked for containment
1093
 * @param set2 set to be checked for containment
1094
 * @return true if the test condition is met
1095
 * @stable ICU 3.2
1096
 */
1097
U_CAPI UBool U_EXPORT2
1098
uset_containsNone(const USet* set1, const USet* set2);
1099
1100
/**
1101
 * Returns true if set1 contains some of the characters and strings
1102
 * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
1103
 * @param set1 set to be checked for containment
1104
 * @param set2 set to be checked for containment
1105
 * @return true if the test condition is met
1106
 * @stable ICU 3.2
1107
 */
1108
U_CAPI UBool U_EXPORT2
1109
uset_containsSome(const USet* set1, const USet* set2);
1110
1111
/**
1112
 * Returns the length of the initial substring of the input string which
1113
 * consists only of characters and strings that are contained in this set
1114
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1115
 * or only of characters and strings that are not contained
1116
 * in this set (USET_SPAN_NOT_CONTAINED).
1117
 * See USetSpanCondition for details.
1118
 * Similar to the strspn() C library function.
1119
 * Unpaired surrogates are treated according to contains() of their surrogate code points.
1120
 * This function works faster with a frozen set and with a non-negative string length argument.
1121
 * @param set the set
1122
 * @param s start of the string
1123
 * @param length of the string; can be -1 for NUL-terminated
1124
 * @param spanCondition specifies the containment condition
1125
 * @return the length of the initial substring according to the spanCondition;
1126
 *         0 if the start of the string does not fit the spanCondition
1127
 * @stable ICU 3.8
1128
 * @see USetSpanCondition
1129
 */
1130
U_CAPI int32_t U_EXPORT2
1131
uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1132
1133
/**
1134
 * Returns the start of the trailing substring of the input string which
1135
 * consists only of characters and strings that are contained in this set
1136
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1137
 * or only of characters and strings that are not contained
1138
 * in this set (USET_SPAN_NOT_CONTAINED).
1139
 * See USetSpanCondition for details.
1140
 * Unpaired surrogates are treated according to contains() of their surrogate code points.
1141
 * This function works faster with a frozen set and with a non-negative string length argument.
1142
 * @param set the set
1143
 * @param s start of the string
1144
 * @param length of the string; can be -1 for NUL-terminated
1145
 * @param spanCondition specifies the containment condition
1146
 * @return the start of the trailing substring according to the spanCondition;
1147
 *         the string length if the end of the string does not fit the spanCondition
1148
 * @stable ICU 3.8
1149
 * @see USetSpanCondition
1150
 */
1151
U_CAPI int32_t U_EXPORT2
1152
uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1153
1154
/**
1155
 * Returns the length of the initial substring of the input string which
1156
 * consists only of characters and strings that are contained in this set
1157
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1158
 * or only of characters and strings that are not contained
1159
 * in this set (USET_SPAN_NOT_CONTAINED).
1160
 * See USetSpanCondition for details.
1161
 * Similar to the strspn() C library function.
1162
 * Malformed byte sequences are treated according to contains(0xfffd).
1163
 * This function works faster with a frozen set and with a non-negative string length argument.
1164
 * @param set the set
1165
 * @param s start of the string (UTF-8)
1166
 * @param length of the string; can be -1 for NUL-terminated
1167
 * @param spanCondition specifies the containment condition
1168
 * @return the length of the initial substring according to the spanCondition;
1169
 *         0 if the start of the string does not fit the spanCondition
1170
 * @stable ICU 3.8
1171
 * @see USetSpanCondition
1172
 */
1173
U_CAPI int32_t U_EXPORT2
1174
uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1175
1176
/**
1177
 * Returns the start of the trailing substring of the input string which
1178
 * consists only of characters and strings that are contained in this set
1179
 * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1180
 * or only of characters and strings that are not contained
1181
 * in this set (USET_SPAN_NOT_CONTAINED).
1182
 * See USetSpanCondition for details.
1183
 * Malformed byte sequences are treated according to contains(0xfffd).
1184
 * This function works faster with a frozen set and with a non-negative string length argument.
1185
 * @param set the set
1186
 * @param s start of the string (UTF-8)
1187
 * @param length of the string; can be -1 for NUL-terminated
1188
 * @param spanCondition specifies the containment condition
1189
 * @return the start of the trailing substring according to the spanCondition;
1190
 *         the string length if the end of the string does not fit the spanCondition
1191
 * @stable ICU 3.8
1192
 * @see USetSpanCondition
1193
 */
1194
U_CAPI int32_t U_EXPORT2
1195
uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1196
1197
/**
1198
 * Returns true if set1 contains all of the characters and strings
1199
 * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
1200
 * @param set1 set to be checked for containment
1201
 * @param set2 set to be checked for containment
1202
 * @return true if the test condition is met
1203
 * @stable ICU 3.2
1204
 */
1205
U_CAPI UBool U_EXPORT2
1206
uset_equals(const USet* set1, const USet* set2);
1207
1208
/*********************************************************************
1209
 * Serialized set API
1210
 *********************************************************************/
1211
1212
/**
1213
 * Serializes this set into an array of 16-bit integers.  Serialization
1214
 * (currently) only records the characters in the set; multicharacter
1215
 * strings are ignored.
1216
 *
1217
 * The array
1218
 * has following format (each line is one 16-bit integer):
1219
 *
1220
 *  length     = (n+2*m) | (m!=0?0x8000:0)
1221
 *  bmpLength  = n; present if m!=0
1222
 *  bmp[0]
1223
 *  bmp[1]
1224
 *  ...
1225
 *  bmp[n-1]
1226
 *  supp-high[0]
1227
 *  supp-low[0]
1228
 *  supp-high[1]
1229
 *  supp-low[1]
1230
 *  ...
1231
 *  supp-high[m-1]
1232
 *  supp-low[m-1]
1233
 *
1234
 * The array starts with a header.  After the header are n bmp
1235
 * code points, then m supplementary code points.  Either n or m
1236
 * or both may be zero.  n+2*m is always <= 0x7FFF.
1237
 *
1238
 * If there are no supplementary characters (if m==0) then the
1239
 * header is one 16-bit integer, 'length', with value n.
1240
 *
1241
 * If there are supplementary characters (if m!=0) then the header
1242
 * is two 16-bit integers.  The first, 'length', has value
1243
 * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
1244
 *
1245
 * After the header the code points are stored in ascending order.
1246
 * Supplementary code points are stored as most significant 16
1247
 * bits followed by least significant 16 bits.
1248
 *
1249
 * @param set the set
1250
 * @param dest pointer to buffer of destCapacity 16-bit integers.
1251
 * May be NULL only if destCapacity is zero.
1252
 * @param destCapacity size of dest, or zero.  Must not be negative.
1253
 * @param pErrorCode pointer to the error code.  Will be set to
1254
 * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
1255
 * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
1256
 * @return the total length of the serialized format, including
1257
 * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
1258
 * than U_BUFFER_OVERFLOW_ERROR.
1259
 * @stable ICU 2.4
1260
 */
1261
U_CAPI int32_t U_EXPORT2
1262
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1263
1264
/**
1265
 * Given a serialized array, fill in the given serialized set object.
1266
 * @param fillSet pointer to result
1267
 * @param src pointer to start of array
1268
 * @param srcLength length of array
1269
 * @return true if the given array is valid, otherwise false
1270
 * @stable ICU 2.4
1271
 */
1272
U_CAPI UBool U_EXPORT2
1273
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1274
1275
/**
1276
 * Set the USerializedSet to contain the given character (and nothing
1277
 * else).
1278
 * @param fillSet pointer to result
1279
 * @param c The codepoint to set
1280
 * @stable ICU 2.4
1281
 */
1282
U_CAPI void U_EXPORT2
1283
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
1284
1285
/**
1286
 * Returns true if the given USerializedSet contains the given
1287
 * character.
1288
 * @param set the serialized set
1289
 * @param c The codepoint to check for within the set
1290
 * @return true if set contains c
1291
 * @stable ICU 2.4
1292
 */
1293
U_CAPI UBool U_EXPORT2
1294
uset_serializedContains(const USerializedSet* set, UChar32 c);
1295
1296
/**
1297
 * Returns the number of disjoint ranges of characters contained in
1298
 * the given serialized set.  Ignores any strings contained in the
1299
 * set.
1300
 * @param set the serialized set
1301
 * @return a non-negative integer counting the character ranges
1302
 * contained in set
1303
 * @stable ICU 2.4
1304
 */
1305
U_CAPI int32_t U_EXPORT2
1306
uset_getSerializedRangeCount(const USerializedSet* set);
1307
1308
/**
1309
 * Returns a range of characters contained in the given serialized
1310
 * set.
1311
 * @param set the serialized set
1312
 * @param rangeIndex a non-negative integer in the range 0..
1313
 * uset_getSerializedRangeCount(set)-1
1314
 * @param pStart pointer to variable to receive first character
1315
 * in range, inclusive
1316
 * @param pEnd pointer to variable to receive last character in range,
1317
 * inclusive
1318
 * @return true if rangeIndex is valid, otherwise false
1319
 * @stable ICU 2.4
1320
 */
1321
U_CAPI UBool U_EXPORT2
1322
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1323
                        UChar32* pStart, UChar32* pEnd);
1324
1325
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1326
1327
namespace U_HEADER_ONLY_NAMESPACE {
1328
1329
// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1330
// not intended to be used via export from the ICU DLL.
1331
1332
/**
1333
 * Iterator returned by USetCodePoints.
1334
 * @stable ICU 76
1335
 */
1336
class USetCodePointIterator {
1337
public:
1338
    /** @stable ICU 76 */
1339
    USetCodePointIterator(const USetCodePointIterator &other) = default;
1340
1341
    /** @stable ICU 76 */
1342
0
    bool operator==(const USetCodePointIterator &other) const {
1343
0
        // No need to compare rangeCount & end given private constructor
1344
0
        // and assuming we don't compare iterators across the set being modified.
1345
0
        // And comparing rangeIndex is redundant with comparing c.
1346
0
        // We might even skip comparing uset.
1347
0
        // Unless we want operator==() to be "correct" for more than iteration.
1348
0
        return uset == other.uset && c == other.c;
1349
0
    }
1350
1351
    /** @stable ICU 76 */
1352
0
    bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1353
1354
    /** @stable ICU 76 */
1355
0
    UChar32 operator*() const { return c; }
1356
1357
    /**
1358
     * Pre-increment.
1359
     * @stable ICU 76
1360
     */
1361
0
    USetCodePointIterator &operator++() {
1362
0
        if (c < end) {
1363
0
            ++c;
1364
0
        } else if (rangeIndex < rangeCount) {
1365
0
            UErrorCode errorCode = U_ZERO_ERROR;
1366
0
            int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1367
0
            if (U_SUCCESS(errorCode) && result == 0) {
1368
0
                ++rangeIndex;
1369
0
            } else {
1370
0
                c = end = U_SENTINEL;
1371
0
            }
1372
0
        } else {
1373
0
            c = end = U_SENTINEL;
1374
0
        }
1375
0
        return *this;
1376
0
    }
1377
1378
    /**
1379
     * Post-increment.
1380
     * @stable ICU 76
1381
     */
1382
0
    USetCodePointIterator operator++(int) {
1383
0
        USetCodePointIterator result(*this);
1384
0
        operator++();
1385
0
        return result;
1386
0
    }
1387
1388
private:
1389
    friend class USetCodePoints;
1390
1391
    USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1392
            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
1393
0
                c(U_SENTINEL), end(U_SENTINEL) {
1394
0
        // Fetch the first range.
1395
0
        operator++();
1396
0
    }
1397
1398
    const USet *uset;
1399
    int32_t rangeIndex;
1400
    int32_t rangeCount;
1401
    UChar32 c, end;
1402
};
1403
1404
/**
1405
 * C++ "range" for iterating over the code points of a USet.
1406
 *
1407
 * \code
1408
 * using U_HEADER_NESTED_NAMESPACE::USetCodePoints;
1409
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
1410
 * for (UChar32 c : USetCodePoints(uset.getAlias())) {
1411
 *     printf("uset.codePoint U+%04lx\n", (long)c);
1412
 * }
1413
 * \endcode
1414
 *
1415
 * C++ UnicodeSet has member functions for iteration, including codePoints().
1416
 *
1417
 * @stable ICU 76
1418
 * @see USetRanges
1419
 * @see USetStrings
1420
 * @see USetElements
1421
 */
1422
class USetCodePoints {
1423
public:
1424
    /**
1425
     * Constructs a C++ "range" object over the code points of the USet.
1426
     * @stable ICU 76
1427
     */
1428
0
    USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1429
1430
    /** @stable ICU 76 */
1431
    USetCodePoints(const USetCodePoints &other) = default;
1432
1433
    /** @stable ICU 76 */
1434
0
    USetCodePointIterator begin() const {
1435
0
        return USetCodePointIterator(uset, 0, rangeCount);
1436
0
    }
1437
1438
    /** @stable ICU 76 */
1439
0
    USetCodePointIterator end() const {
1440
0
        return USetCodePointIterator(uset, rangeCount, rangeCount);
1441
0
    }
1442
1443
private:
1444
    const USet *uset;
1445
    int32_t rangeCount;
1446
};
1447
1448
/**
1449
 * A contiguous range of code points in a USet/UnicodeSet.
1450
 * Returned by USetRangeIterator which is returned by USetRanges.
1451
 * Both the rangeStart and rangeEnd are in the range.
1452
 * (end() returns an iterator corresponding to rangeEnd+1.)
1453
 * @stable ICU 76
1454
 */
1455
struct CodePointRange {
1456
    /** @stable ICU 76 */
1457
    struct iterator {
1458
        /** @stable ICU 76 */
1459
0
        iterator(UChar32 aC) : c(aC) {}
1460
1461
        /** @stable ICU 76 */
1462
0
        bool operator==(const iterator &other) const { return c == other.c; }
1463
        /** @stable ICU 76 */
1464
0
        bool operator!=(const iterator &other) const { return !operator==(other); }
1465
1466
        /** @stable ICU 76 */
1467
0
        UChar32 operator*() const { return c; }
1468
1469
        /**
1470
         * Pre-increment.
1471
         * @stable ICU 76
1472
         */
1473
0
        iterator &operator++() {
1474
0
            ++c;
1475
0
            return *this;
1476
0
        }
1477
1478
        /**
1479
         * Post-increment.
1480
         * @stable ICU 76
1481
         */
1482
0
        iterator operator++(int) {
1483
0
            return c++;
1484
0
        }
1485
1486
        /**
1487
         * The current code point in the range.
1488
         * @stable ICU 76
1489
         */
1490
        UChar32 c;
1491
    };
1492
1493
    /** @stable ICU 76 */
1494
0
    CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
1495
    /** @stable ICU 76 */
1496
    CodePointRange(const CodePointRange &other) = default;
1497
    /** @stable ICU 76 */
1498
0
    size_t size() const { return (rangeEnd + 1) - rangeStart; }
1499
    /** @stable ICU 76 */
1500
0
    iterator begin() const { return rangeStart; }
1501
    /** @stable ICU 76 */
1502
0
    iterator end() const { return rangeEnd + 1; }
1503
1504
    /**
1505
     * Start of a USet/UnicodeSet range of code points.
1506
     * @stable ICU 76
1507
     */
1508
    UChar32 rangeStart;
1509
    /**
1510
     * Inclusive end of a USet/UnicodeSet range of code points.
1511
     * @stable ICU 76
1512
     */
1513
    UChar32 rangeEnd;
1514
};
1515
1516
/**
1517
 * Iterator returned by USetRanges.
1518
 * @stable ICU 76
1519
 */
1520
class USetRangeIterator {
1521
public:
1522
    /** @stable ICU 76 */
1523
    USetRangeIterator(const USetRangeIterator &other) = default;
1524
1525
    /** @stable ICU 76 */
1526
0
    bool operator==(const USetRangeIterator &other) const {
1527
0
        // No need to compare rangeCount given private constructor
1528
0
        // and assuming we don't compare iterators across the set being modified.
1529
0
        // We might even skip comparing uset.
1530
0
        // Unless we want operator==() to be "correct" for more than iteration.
1531
0
        return uset == other.uset && rangeIndex == other.rangeIndex;
1532
0
    }
1533
1534
    /** @stable ICU 76 */
1535
0
    bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1536
1537
    /** @stable ICU 76 */
1538
0
    CodePointRange operator*() const {
1539
0
        if (rangeIndex < rangeCount) {
1540
0
            UChar32 start, end;
1541
0
            UErrorCode errorCode = U_ZERO_ERROR;
1542
0
            int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1543
0
            if (U_SUCCESS(errorCode) && result == 0) {
1544
0
                return CodePointRange(start, end);
1545
0
            }
1546
0
        }
1547
0
        return CodePointRange(U_SENTINEL, U_SENTINEL);
1548
0
    }
1549
1550
    /**
1551
     * Pre-increment.
1552
     * @stable ICU 76
1553
     */
1554
0
    USetRangeIterator &operator++() {
1555
0
        ++rangeIndex;
1556
0
        return *this;
1557
0
    }
1558
1559
    /**
1560
     * Post-increment.
1561
     * @stable ICU 76
1562
     */
1563
0
    USetRangeIterator operator++(int) {
1564
0
        USetRangeIterator result(*this);
1565
0
        ++rangeIndex;
1566
0
        return result;
1567
0
    }
1568
1569
private:
1570
    friend class USetRanges;
1571
1572
    USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1573
0
            : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}
1574
1575
    const USet *uset;
1576
    int32_t rangeIndex;
1577
    int32_t rangeCount;
1578
};
1579
1580
/**
1581
 * C++ "range" for iterating over the code point ranges of a USet.
1582
 *
1583
 * \code
1584
 * using U_HEADER_NESTED_NAMESPACE::USetRanges;
1585
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
1586
 * for (auto [start, end] : USetRanges(uset.getAlias())) {
1587
 *     printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end);
1588
 * }
1589
 * for (auto range : USetRanges(uset.getAlias())) {
1590
 *     for (UChar32 c : range) {
1591
 *         printf("uset.range.c U+%04lx\n", (long)c);
1592
 *     }
1593
 * }
1594
 * \endcode
1595
 *
1596
 * C++ UnicodeSet has member functions for iteration, including ranges().
1597
 *
1598
 * @stable ICU 76
1599
 * @see USetCodePoints
1600
 * @see USetStrings
1601
 * @see USetElements
1602
 */
1603
class USetRanges {
1604
public:
1605
    /**
1606
     * Constructs a C++ "range" object over the code point ranges of the USet.
1607
     * @stable ICU 76
1608
     */
1609
0
    USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1610
1611
    /** @stable ICU 76 */
1612
    USetRanges(const USetRanges &other) = default;
1613
1614
    /** @stable ICU 76 */
1615
0
    USetRangeIterator begin() const {
1616
0
        return USetRangeIterator(uset, 0, rangeCount);
1617
0
    }
1618
1619
    /** @stable ICU 76 */
1620
0
    USetRangeIterator end() const {
1621
0
        return USetRangeIterator(uset, rangeCount, rangeCount);
1622
0
    }
1623
1624
private:
1625
    const USet *uset;
1626
    int32_t rangeCount;
1627
};
1628
1629
/**
1630
 * Iterator returned by USetStrings.
1631
 * @stable ICU 76
1632
 */
1633
class USetStringIterator {
1634
public:
1635
    /** @stable ICU 76 */
1636
    USetStringIterator(const USetStringIterator &other) = default;
1637
1638
    /** @stable ICU 76 */
1639
0
    bool operator==(const USetStringIterator &other) const {
1640
0
        // No need to compare count given private constructor
1641
0
        // and assuming we don't compare iterators across the set being modified.
1642
0
        // We might even skip comparing uset.
1643
0
        // Unless we want operator==() to be "correct" for more than iteration.
1644
0
        return uset == other.uset && index == other.index;
1645
0
    }
1646
1647
    /** @stable ICU 76 */
1648
0
    bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1649
1650
    /** @stable ICU 76 */
1651
0
    std::u16string_view operator*() const {
1652
0
        if (index < count) {
1653
0
            int32_t length;
1654
0
            const UChar *uchars = uset_getString(uset, index, &length);
1655
0
            // assert uchars != nullptr;
1656
0
            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1657
0
        }
1658
0
        return {};
1659
0
    }
1660
1661
    /**
1662
     * Pre-increment.
1663
     * @stable ICU 76
1664
     */
1665
0
    USetStringIterator &operator++() {
1666
0
        ++index;
1667
0
        return *this;
1668
0
    }
1669
1670
    /**
1671
     * Post-increment.
1672
     * @stable ICU 76
1673
     */
1674
0
    USetStringIterator operator++(int) {
1675
0
        USetStringIterator result(*this);
1676
0
        ++index;
1677
0
        return result;
1678
0
    }
1679
1680
private:
1681
    friend class USetStrings;
1682
1683
    USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
1684
0
            : uset(pUset), index(nIndex), count(nCount) {}
1685
1686
    const USet *uset;
1687
    int32_t index;
1688
    int32_t count;
1689
};
1690
1691
/**
1692
 * C++ "range" for iterating over the empty and multi-character strings of a USet.
1693
 *
1694
 * \code
1695
 * using U_HEADER_NESTED_NAMESPACE::USetStrings;
1696
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
1697
 * for (auto s : USetStrings(uset.getAlias())) {
1698
 *     int32_t len32 = s.length();
1699
 *     char utf8[200];
1700
 *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
1701
 *                        s.data(), len32, 0xFFFD, nullptr, errorCode);
1702
 *     printf("uset.string length %ld \"%s\"\n", long{len32}, utf8);
1703
 * }
1704
 * \endcode
1705
 *
1706
 * C++ UnicodeSet has member functions for iteration, including strings().
1707
 *
1708
 * @stable ICU 76
1709
 * @see USetCodePoints
1710
 * @see USetRanges
1711
 * @see USetElements
1712
 */
1713
class USetStrings {
1714
public:
1715
    /**
1716
     * Constructs a C++ "range" object over the strings of the USet.
1717
     * @stable ICU 76
1718
     */
1719
0
    USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
1720
1721
    /** @stable ICU 76 */
1722
    USetStrings(const USetStrings &other) = default;
1723
1724
    /** @stable ICU 76 */
1725
0
    USetStringIterator begin() const {
1726
0
        return USetStringIterator(uset, 0, count);
1727
0
    }
1728
1729
    /** @stable ICU 76 */
1730
0
    USetStringIterator end() const {
1731
0
        return USetStringIterator(uset, count, count);
1732
0
    }
1733
1734
private:
1735
    const USet *uset;
1736
    int32_t count;
1737
};
1738
1739
#ifndef U_HIDE_DRAFT_API
1740
/**
1741
 * Iterator returned by USetElements.
1742
 * @draft ICU 77
1743
 */
1744
class USetElementIterator {
1745
public:
1746
    /** @draft ICU 77 */
1747
    USetElementIterator(const USetElementIterator &other) = default;
1748
1749
    /** @draft ICU 77 */
1750
0
    bool operator==(const USetElementIterator &other) const {
1751
0
        // No need to compare rangeCount & end given private constructor
1752
0
        // and assuming we don't compare iterators across the set being modified.
1753
0
        // We might even skip comparing uset.
1754
0
        // Unless we want operator==() to be "correct" for more than iteration.
1755
0
        return uset == other.uset && c == other.c && index == other.index;
1756
0
    }
1757
1758
    /** @draft ICU 77 */
1759
0
    bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1760
1761
    /** @draft ICU 77 */
1762
0
    std::u16string operator*() const {
1763
0
        if (c >= 0) {
1764
0
            return c <= 0xffff ?
1765
0
                std::u16string({static_cast<char16_t>(c)}) :
1766
0
                std::u16string({U16_LEAD(c), U16_TRAIL(c)});
1767
0
        } else if (index < totalCount) {
1768
0
            int32_t length;
1769
0
            const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1770
0
            // assert uchars != nullptr;
1771
0
            return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1772
0
        } else {
1773
0
            return {};
1774
0
        }
1775
0
    }
1776
1777
    /**
1778
     * Pre-increment.
1779
     * @draft ICU 77
1780
     */
1781
0
    USetElementIterator &operator++() {
1782
0
        if (c < end) {
1783
0
            ++c;
1784
0
        } else if (index < rangeCount) {
1785
0
            UErrorCode errorCode = U_ZERO_ERROR;
1786
0
            int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1787
0
            if (U_SUCCESS(errorCode) && result == 0) {
1788
0
                ++index;
1789
0
            } else {
1790
0
                c = end = U_SENTINEL;
1791
0
            }
1792
0
        } else if (c >= 0) {
1793
0
            // assert index == rangeCount;
1794
0
            // Switch from the last range to the first string.
1795
0
            c = end = U_SENTINEL;
1796
0
        } else {
1797
0
            ++index;
1798
0
        }
1799
0
        return *this;
1800
0
    }
1801
1802
    /**
1803
     * Post-increment.
1804
     * @draft ICU 77
1805
     */
1806
0
    USetElementIterator operator++(int) {
1807
0
        USetElementIterator result(*this);
1808
0
        operator++();
1809
0
        return result;
1810
0
    }
1811
1812
private:
1813
    friend class USetElements;
1814
1815
    USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
1816
            : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
1817
0
                c(U_SENTINEL), end(U_SENTINEL) {
1818
0
        if (index < rangeCount) {
1819
0
            // Fetch the first range.
1820
0
            operator++();
1821
0
        }
1822
0
        // Otherwise don't move beyond the (index - rangeCount)-th string.
1823
0
    }
1824
1825
    const USet *uset;
1826
    int32_t index;
1827
    /** Number of UnicodeSet/USet code point ranges. */
1828
    int32_t rangeCount;
1829
    /**
1830
     * Number of code point ranges plus number of strings.
1831
     * index starts from 0, counts ranges while less than rangeCount,
1832
     * then counts strings while at least rangeCount and less than totalCount.
1833
     *
1834
     * Note that totalCount is the same as uset_getItemCount(), but usually
1835
     * smaller than the number of elements returned by this iterator
1836
     * because we return each code point of each range.
1837
     */
1838
    int32_t totalCount;
1839
    UChar32 c, end;
1840
};
1841
1842
/**
1843
 * A C++ "range" for iterating over all of the elements of a USet.
1844
 * Convenient all-in one iteration, but creates a std::u16string for each
1845
 * code point or string.
1846
 *
1847
 * Code points are returned first, then empty and multi-character strings.
1848
 *
1849
 * \code
1850
 * using U_HEADER_NESTED_NAMESPACE::USetElements;
1851
 * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
1852
 * for (auto el : USetElements(uset.getAlias())) {
1853
 *     int32_t len32 = el.length();
1854
 *     char utf8[200];
1855
 *     u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
1856
 *                        el.data(), len32, 0xFFFD, nullptr, errorCode);
1857
 *     printf("uset.element length %ld \"%s\"\n", long{len32}, utf8);
1858
 * }
1859
 * \endcode
1860
 *
1861
 * C++ UnicodeSet has member functions for iteration, including begin() and end().
1862
 *
1863
 * @return an all-elements iterator.
1864
 * @draft ICU 77
1865
 * @see USetCodePoints
1866
 * @see USetRanges
1867
 * @see USetStrings
1868
 */
1869
class USetElements {
1870
public:
1871
    /**
1872
     * Constructs a C++ "range" object over all of the elements of the USet.
1873
     * @draft ICU 77
1874
     */
1875
    USetElements(const USet *pUset)
1876
        : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
1877
0
            stringCount(uset_getStringCount(pUset)) {}
1878
1879
    /** @draft ICU 77 */
1880
    USetElements(const USetElements &other) = default;
1881
1882
    /** @draft ICU 77 */
1883
0
    USetElementIterator begin() const {
1884
0
        return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1885
0
    }
1886
1887
    /** @draft ICU 77 */
1888
0
    USetElementIterator end() const {
1889
0
        return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1890
0
    }
1891
1892
private:
1893
    const USet *uset;
1894
    int32_t rangeCount, stringCount;
1895
};
1896
1897
#endif  // U_HIDE_DRAFT_API
1898
1899
}  // namespace U_HEADER_ONLY_NAMESPACE
1900
1901
#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1902
1903
#endif  // __USET_H__