Coverage Report

Created: 2025-10-24 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/unicode/usetiter.h
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2002-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
#ifndef USETITER_H
10
#define USETITER_H
11
12
#include "unicode/utypes.h"
13
14
#if U_SHOW_CPLUSPLUS_API
15
16
#include "unicode/uobject.h"
17
#include "unicode/unistr.h"
18
19
/**
20
 * \file 
21
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
22
 */
23
24
U_NAMESPACE_BEGIN
25
26
class UnicodeSet;
27
class UnicodeString;
28
29
/**
30
 *
31
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
32
 * iterates over either code points or code point ranges.  After all
33
 * code points or ranges have been returned, it returns the
34
 * multicharacter strings of the UnicodeSet, if any.
35
 *
36
 * This class is not intended for public subclassing.
37
 *
38
 * <p>To iterate over code points and strings, use a loop like this:
39
 * <pre>
40
 * UnicodeSetIterator it(set);
41
 * while (it.next()) {
42
 *     processItem(it.getString());
43
 * }
44
 * </pre>
45
 * <p>Each item in the set is accessed as a string.  Set elements
46
 *    consisting of single code points are returned as strings containing
47
 *    just the one code point.
48
 *
49
 * <p>To iterate over code point ranges, instead of individual code points,
50
 *    use a loop like this:
51
 * <pre>
52
 * UnicodeSetIterator it(set);
53
 * while (it.nextRange()) {
54
 *   if (it.isString()) {
55
 *     processString(it.getString());
56
 *   } else {
57
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
58
 *   }
59
 * }
60
 * </pre>
61
 *
62
 * To iterate over only the strings, start with <code>skipToStrings()</code>.
63
 *
64
 * @author M. Davis
65
 * @stable ICU 2.4
66
 */
67
class U_COMMON_API UnicodeSetIterator final : public UObject {
68
    /**
69
     * Value of <tt>codepoint</tt> if the iterator points to a string.
70
     * If <tt>codepoint == IS_STRING</tt>, then examine
71
     * <tt>string</tt> for the current iteration result.
72
     */
73
    enum { IS_STRING = -1 };
74
75
    /**
76
     * Current code point, or the special value <tt>IS_STRING</tt>, if
77
     * the iterator points to a string.
78
     */
79
    UChar32 codepoint;
80
81
    /**
82
     * When iterating over ranges using <tt>nextRange()</tt>,
83
     * <tt>codepointEnd</tt> contains the inclusive end of the
84
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
85
     * iterating over code points using <tt>next()</tt>, or if
86
     * <tt>codepoint == IS_STRING</tt>, then the value of
87
     * <tt>codepointEnd</tt> is undefined.
88
     */
89
    UChar32 codepointEnd;
90
91
    /**
92
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
93
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
94
     * value of <tt>string</tt> is undefined.
95
     */
96
    const UnicodeString* string;
97
98
 public:
99
100
    /**
101
     * Create an iterator over the given set.  The iterator is valid
102
     * only so long as <tt>set</tt> is valid.
103
     * @param set set to iterate over
104
     * @stable ICU 2.4
105
     */
106
    UnicodeSetIterator(const UnicodeSet& set);
107
108
    /**
109
     * Create an iterator over nothing.  <tt>next()</tt> and
110
     * <tt>nextRange()</tt> return false. This is a convenience
111
     * constructor allowing the target to be set later.
112
     * @stable ICU 2.4
113
     */
114
    UnicodeSetIterator();
115
116
    /**
117
     * Destructor.
118
     * @stable ICU 2.4
119
     */
120
    virtual ~UnicodeSetIterator();
121
122
    /**
123
     * Returns true if the current element is a string.  If so, the
124
     * caller can retrieve it with <tt>getString()</tt>.  If this
125
     * method returns false, the current element is a code point or
126
     * code point range, depending on whether <tt>next()</tt> or
127
     * <tt>nextRange()</tt> was called.
128
     * Elements of types string and codepoint can both be retrieved
129
     * with the function <tt>getString()</tt>.
130
     * Elements of type codepoint can also be retrieved with
131
     * <tt>getCodepoint()</tt>.
132
     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
133
     * of the range, and <tt>getCodepointEnd()</tt> returns the end
134
     * of the range.
135
     * @stable ICU 2.4
136
     */
137
    inline UBool isString() const;
138
139
    /**
140
     * Returns the current code point, if <tt>isString()</tt> returned
141
     * false.  Otherwise returns an undefined result.
142
     * @stable ICU 2.4
143
     */
144
    inline UChar32 getCodepoint() const;
145
146
    /**
147
     * Returns the end of the current code point range, if
148
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
149
     * called.  Otherwise returns an undefined result.
150
     * @stable ICU 2.4
151
     */
152
    inline UChar32 getCodepointEnd() const;
153
154
    /**
155
     * Returns the current string, if <tt>isString()</tt> returned
156
     * true.  If the current iteration item is a code point, a UnicodeString
157
     * containing that single code point is returned.
158
     *
159
     * Ownership of the returned string remains with the iterator.
160
     * The string is guaranteed to remain valid only until the iterator is
161
     *   advanced to the next item, or until the iterator is deleted.
162
     * 
163
     * @stable ICU 2.4
164
     */
165
    const UnicodeString& getString();
166
167
    /**
168
     * Skips over the remaining code points/ranges, if any.
169
     * A following call to next() or nextRange() will yield a string, if there is one.
170
     * No-op if next() would return false, or if it would yield a string anyway.
171
     *
172
     * @return *this
173
     * @stable ICU 70
174
     * @see UnicodeSet#strings()
175
     */
176
0
    inline UnicodeSetIterator &skipToStrings() {
177
0
        // Finish code point/range iteration.
178
0
        range = endRange;
179
0
        endElement = -1;
180
0
        nextElement = 0;
181
0
        return *this;
182
0
    }
183
184
    /**
185
     * Advances the iteration position to the next element in the set, 
186
     * which can be either a single code point or a string.  
187
     * If there are no more elements in the set, return false.
188
     *
189
     * <p>
190
     * If <tt>isString() == true</tt>, the value is a
191
     * string, otherwise the value is a
192
     * single code point.  Elements of either type can be retrieved
193
     * with the function <tt>getString()</tt>, while elements of
194
     * consisting of a single code point can be retrieved with
195
     * <tt>getCodepoint()</tt>
196
     *
197
     * <p>The order of iteration is all code points in sorted order,
198
     * followed by all strings sorted order.    Do not mix
199
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
200
     * calling <tt>reset()</tt> between them.  The results of doing so
201
     * are undefined.
202
     *
203
     * @return true if there was another element in the set.
204
     * @stable ICU 2.4
205
     */
206
    UBool next();
207
208
    /**
209
     * Returns the next element in the set, either a code point range
210
     * or a string.  If there are no more elements in the set, return
211
     * false.  If <tt>isString() == true</tt>, the value is a
212
     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
213
     * range of one or more code points from <tt>getCodepoint()</tt> to
214
     * <tt>getCodepointeEnd()</tt> inclusive.
215
     *
216
     * <p>The order of iteration is all code points ranges in sorted
217
     * order, followed by all strings sorted order.  Ranges are
218
     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
219
     * is undefined unless <tt>isString() == true</tt>.  Do not mix calls to
220
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
221
     * <tt>reset()</tt> between them.  The results of doing so are
222
     * undefined.
223
     *
224
     * @return true if there was another element in the set.
225
     * @stable ICU 2.4
226
     */
227
    UBool nextRange();
228
229
    /**
230
     * Sets this iterator to visit the elements of the given set and
231
     * resets it to the start of that set.  The iterator is valid only
232
     * so long as <tt>set</tt> is valid.
233
     * @param set the set to iterate over.
234
     * @stable ICU 2.4
235
     */
236
    void reset(const UnicodeSet& set);
237
238
    /**
239
     * Resets this iterator to the start of the set.
240
     * @stable ICU 2.4
241
     */
242
    void reset();
243
244
    /**
245
     * ICU "poor man's RTTI", returns a UClassID for this class.
246
     *
247
     * @stable ICU 2.4
248
     */
249
    static UClassID U_EXPORT2 getStaticClassID();
250
251
    /**
252
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
253
     *
254
     * @stable ICU 2.4
255
     */
256
    virtual UClassID getDynamicClassID() const override;
257
258
    // ======================= PRIVATES ===========================
259
260
private:
261
262
    // endElement and nextElements are really UChar32's, but we keep
263
    // them as signed int32_t's so we can do comparisons with
264
    // endElement set to -1.  Leave them as int32_t's.
265
    /** The set
266
     */
267
    const UnicodeSet* set;
268
    /** End range
269
     */
270
    int32_t endRange;
271
    /** Range
272
     */
273
    int32_t range;
274
    /** End element
275
     */
276
    int32_t endElement;
277
    /** Next element
278
     */
279
    int32_t nextElement;
280
    /** Next string
281
     */
282
    int32_t nextString;
283
    /** String count
284
     */
285
    int32_t stringCount;
286
287
    /**
288
     *  Points to the string to use when the caller asks for a
289
     *  string and the current iteration item is a code point, not a string.
290
     */
291
    UnicodeString *cpString;
292
293
    /** Copy constructor. Disallowed.
294
     */
295
    UnicodeSetIterator(const UnicodeSetIterator&) = delete;
296
297
    /** Assignment operator. Disallowed.
298
     */
299
    UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
300
301
    /** Load range
302
     */
303
    void loadRange(int32_t range);
304
};
305
306
2.56M
inline UBool UnicodeSetIterator::isString() const {
307
2.56M
    return codepoint < 0;
308
2.56M
}
309
310
45.5M
inline UChar32 UnicodeSetIterator::getCodepoint() const {
311
45.5M
    return codepoint;
312
45.5M
}
313
314
0
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
315
0
    return codepointEnd;
316
0
}
317
318
319
U_NAMESPACE_END
320
321
#endif /* U_SHOW_CPLUSPLUS_API */
322
323
#endif