Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/unicode/usetiter.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2002-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
#ifndef USETITER_H
10
#define USETITER_H
11
12
#include "unicode/utypes.h"
13
14
#if U_SHOW_CPLUSPLUS_API
15
16
#include "unicode/uobject.h"
17
#include "unicode/unistr.h"
18
19
/**
20
 * \file 
21
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
22
 */
23
24
U_NAMESPACE_BEGIN
25
26
class UnicodeSet;
27
class UnicodeString;
28
29
/**
30
 *
31
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
32
 * iterates over either code points or code point ranges.  After all
33
 * code points or ranges have been returned, it returns the
34
 * multicharacter strings of the UnicodeSet, if any.
35
 *
36
 * This class is not intended for public subclassing.
37
 *
38
 * <p>To iterate over code points and strings, use a loop like this:
39
 * <pre>
40
 * UnicodeSetIterator it(set);
41
 * while (it.next()) {
42
 *     processItem(it.getString());
43
 * }
44
 * </pre>
45
 * <p>Each item in the set is accessed as a string.  Set elements
46
 *    consisting of single code points are returned as strings containing
47
 *    just the one code point.
48
 *
49
 * <p>To iterate over code point ranges, instead of individual code points,
50
 *    use a loop like this:
51
 * <pre>
52
 * UnicodeSetIterator it(set);
53
 * while (it.nextRange()) {
54
 *   if (it.isString()) {
55
 *     processString(it.getString());
56
 *   } else {
57
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
58
 *   }
59
 * }
60
 * </pre>
61
 *
62
 * To iterate over only the strings, start with <code>skipToStrings()</code>.
63
 *
64
 * @author M. Davis
65
 * @stable ICU 2.4
66
 */
67
class U_COMMON_API UnicodeSetIterator U_FINAL : public UObject {
68
    /**
69
     * Value of <tt>codepoint</tt> if the iterator points to a string.
70
     * If <tt>codepoint == IS_STRING</tt>, then examine
71
     * <tt>string</tt> for the current iteration result.
72
     */
73
    enum { IS_STRING = -1 };
74
75
    /**
76
     * Current code point, or the special value <tt>IS_STRING</tt>, if
77
     * the iterator points to a string.
78
     */
79
    UChar32 codepoint;
80
81
    /**
82
     * When iterating over ranges using <tt>nextRange()</tt>,
83
     * <tt>codepointEnd</tt> contains the inclusive end of the
84
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
85
     * iterating over code points using <tt>next()</tt>, or if
86
     * <tt>codepoint == IS_STRING</tt>, then the value of
87
     * <tt>codepointEnd</tt> is undefined.
88
     */
89
    UChar32 codepointEnd;
90
91
    /**
92
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
93
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
94
     * value of <tt>string</tt> is undefined.
95
     */
96
    const UnicodeString* string;
97
98
 public:
99
100
    /**
101
     * Create an iterator over the given set.  The iterator is valid
102
     * only so long as <tt>set</tt> is valid.
103
     * @param set set to iterate over
104
     * @stable ICU 2.4
105
     */
106
    UnicodeSetIterator(const UnicodeSet& set);
107
108
    /**
109
     * Create an iterator over nothing.  <tt>next()</tt> and
110
     * <tt>nextRange()</tt> return false. This is a convenience
111
     * constructor allowing the target to be set later.
112
     * @stable ICU 2.4
113
     */
114
    UnicodeSetIterator();
115
116
    /**
117
     * Destructor.
118
     * @stable ICU 2.4
119
     */
120
    virtual ~UnicodeSetIterator();
121
122
    /**
123
     * Returns true if the current element is a string.  If so, the
124
     * caller can retrieve it with <tt>getString()</tt>.  If this
125
     * method returns false, the current element is a code point or
126
     * code point range, depending on whether <tt>next()</tt> or
127
     * <tt>nextRange()</tt> was called.
128
     * Elements of types string and codepoint can both be retrieved
129
     * with the function <tt>getString()</tt>.
130
     * Elements of type codepoint can also be retrieved with
131
     * <tt>getCodepoint()</tt>.
132
     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
133
     * of the range, and <tt>getCodepointEnd()</tt> returns the end
134
     * of the range.
135
     * @stable ICU 2.4
136
     */
137
    inline UBool isString() const;
138
139
    /**
140
     * Returns the current code point, if <tt>isString()</tt> returned
141
     * false.  Otherwise returns an undefined result.
142
     * @stable ICU 2.4
143
     */
144
    inline UChar32 getCodepoint() const;
145
146
    /**
147
     * Returns the end of the current code point range, if
148
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
149
     * called.  Otherwise returns an undefined result.
150
     * @stable ICU 2.4
151
     */
152
    inline UChar32 getCodepointEnd() const;
153
154
    /**
155
     * Returns the current string, if <tt>isString()</tt> returned
156
     * true.  If the current iteration item is a code point, a UnicodeString
157
     * containing that single code point is returned.
158
     *
159
     * Ownership of the returned string remains with the iterator.
160
     * The string is guaranteed to remain valid only until the iterator is
161
     *   advanced to the next item, or until the iterator is deleted.
162
     * 
163
     * @stable ICU 2.4
164
     */
165
    const UnicodeString& getString();
166
167
#ifndef U_HIDE_DRAFT_API
168
    /**
169
     * Skips over the remaining code points/ranges, if any.
170
     * A following call to next() or nextRange() will yield a string, if there is one.
171
     * No-op if next() would return false, or if it would yield a string anyway.
172
     *
173
     * @return *this
174
     * @draft ICU 70
175
     * @see UnicodeSet#strings()
176
     */
177
0
    inline UnicodeSetIterator &skipToStrings() {
178
0
        // Finish code point/range iteration.
179
0
        range = endRange;
180
0
        endElement = -1;
181
0
        nextElement = 0;
182
0
        return *this;
183
0
    }
184
#endif  // U_HIDE_DRAFT_API
185
186
    /**
187
     * Advances the iteration position to the next element in the set, 
188
     * which can be either a single code point or a string.  
189
     * If there are no more elements in the set, return false.
190
     *
191
     * <p>
192
     * If <tt>isString() == true</tt>, the value is a
193
     * string, otherwise the value is a
194
     * single code point.  Elements of either type can be retrieved
195
     * with the function <tt>getString()</tt>, while elements of
196
     * consisting of a single code point can be retrieved with
197
     * <tt>getCodepoint()</tt>
198
     *
199
     * <p>The order of iteration is all code points in sorted order,
200
     * followed by all strings sorted order.    Do not mix
201
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
202
     * calling <tt>reset()</tt> between them.  The results of doing so
203
     * are undefined.
204
     *
205
     * @return true if there was another element in the set.
206
     * @stable ICU 2.4
207
     */
208
    UBool next();
209
210
    /**
211
     * Returns the next element in the set, either a code point range
212
     * or a string.  If there are no more elements in the set, return
213
     * false.  If <tt>isString() == true</tt>, the value is a
214
     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
215
     * range of one or more code points from <tt>getCodepoint()</tt> to
216
     * <tt>getCodepointeEnd()</tt> inclusive.
217
     *
218
     * <p>The order of iteration is all code points ranges in sorted
219
     * order, followed by all strings sorted order.  Ranges are
220
     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
221
     * is undefined unless <tt>isString() == true</tt>.  Do not mix calls to
222
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
223
     * <tt>reset()</tt> between them.  The results of doing so are
224
     * undefined.
225
     *
226
     * @return true if there was another element in the set.
227
     * @stable ICU 2.4
228
     */
229
    UBool nextRange();
230
231
    /**
232
     * Sets this iterator to visit the elements of the given set and
233
     * resets it to the start of that set.  The iterator is valid only
234
     * so long as <tt>set</tt> is valid.
235
     * @param set the set to iterate over.
236
     * @stable ICU 2.4
237
     */
238
    void reset(const UnicodeSet& set);
239
240
    /**
241
     * Resets this iterator to the start of the set.
242
     * @stable ICU 2.4
243
     */
244
    void reset();
245
246
    /**
247
     * ICU "poor man's RTTI", returns a UClassID for this class.
248
     *
249
     * @stable ICU 2.4
250
     */
251
    static UClassID U_EXPORT2 getStaticClassID();
252
253
    /**
254
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
255
     *
256
     * @stable ICU 2.4
257
     */
258
    virtual UClassID getDynamicClassID() const;
259
260
    // ======================= PRIVATES ===========================
261
262
private:
263
264
    // endElement and nextElements are really UChar32's, but we keep
265
    // them as signed int32_t's so we can do comparisons with
266
    // endElement set to -1.  Leave them as int32_t's.
267
    /** The set
268
     */
269
    const UnicodeSet* set;
270
    /** End range
271
     */
272
    int32_t endRange;
273
    /** Range
274
     */
275
    int32_t range;
276
    /** End element
277
     */
278
    int32_t endElement;
279
    /** Next element
280
     */
281
    int32_t nextElement;
282
    /** Next string
283
     */
284
    int32_t nextString;
285
    /** String count
286
     */
287
    int32_t stringCount;
288
289
    /**
290
     *  Points to the string to use when the caller asks for a
291
     *  string and the current iteration item is a code point, not a string.
292
     */
293
    UnicodeString *cpString;
294
295
    /** Copy constructor. Disallowed.
296
     */
297
    UnicodeSetIterator(const UnicodeSetIterator&) = delete;
298
299
    /** Assignment operator. Disallowed.
300
     */
301
    UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
302
303
    /** Load range
304
     */
305
    void loadRange(int32_t range);
306
};
307
308
0
inline UBool UnicodeSetIterator::isString() const {
309
0
    return codepoint < 0;
310
0
}
311
312
0
inline UChar32 UnicodeSetIterator::getCodepoint() const {
313
0
    return codepoint;
314
0
}
315
316
0
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
317
0
    return codepointEnd;
318
0
}
319
320
321
U_NAMESPACE_END
322
323
#endif /* U_SHOW_CPLUSPLUS_API */
324
325
#endif