Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/unicode/usetiter.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2002-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
#ifndef USETITER_H
10
#define USETITER_H
11
12
#include "unicode/utypes.h"
13
#include "unicode/uobject.h"
14
#include "unicode/unistr.h"
15
16
/**
17
 * \file 
18
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
19
 */
20
21
U_NAMESPACE_BEGIN
22
23
class UnicodeSet;
24
class UnicodeString;
25
26
/**
27
 *
28
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
29
 * iterates over either code points or code point ranges.  After all
30
 * code points or ranges have been returned, it returns the
31
 * multicharacter strings of the UnicodeSet, if any.
32
 *
33
 * This class is not intended to be subclassed.  Consider any fields
34
 *  or methods declared as "protected" to be private.  The use of
35
 *  protected in this class is an artifact of history.
36
 *
37
 * <p>To iterate over code points and strings, use a loop like this:
38
 * <pre>
39
 * UnicodeSetIterator it(set);
40
 * while (it.next()) {
41
 *     processItem(it.getString());
42
 * }
43
 * </pre>
44
 * <p>Each item in the set is accessed as a string.  Set elements
45
 *    consisting of single code points are returned as strings containing
46
 *    just the one code point.
47
 *
48
 * <p>To iterate over code point ranges, instead of individual code points,
49
 *    use a loop like this:
50
 * <pre>
51
 * UnicodeSetIterator it(set);
52
 * while (it.nextRange()) {
53
 *   if (it.isString()) {
54
 *     processString(it.getString());
55
 *   } else {
56
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
57
 *   }
58
 * }
59
 * </pre>
60
 * @author M. Davis
61
 * @stable ICU 2.4
62
 */
63
class U_COMMON_API UnicodeSetIterator : public UObject {
64
65
 protected:
66
67
    /**
68
     * Value of <tt>codepoint</tt> if the iterator points to a string.
69
     * If <tt>codepoint == IS_STRING</tt>, then examine
70
     * <tt>string</tt> for the current iteration result.
71
     * @stable ICU 2.4
72
     */
73
    enum { IS_STRING = -1 };
74
75
    /**
76
     * Current code point, or the special value <tt>IS_STRING</tt>, if
77
     * the iterator points to a string.
78
     * @stable ICU 2.4
79
     */
80
    UChar32 codepoint;
81
82
    /**
83
     * When iterating over ranges using <tt>nextRange()</tt>,
84
     * <tt>codepointEnd</tt> contains the inclusive end of the
85
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
86
     * iterating over code points using <tt>next()</tt>, or if
87
     * <tt>codepoint == IS_STRING</tt>, then the value of
88
     * <tt>codepointEnd</tt> is undefined.
89
     * @stable ICU 2.4
90
     */
91
    UChar32 codepointEnd;
92
93
    /**
94
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
95
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
96
     * value of <tt>string</tt> is undefined.
97
     * @stable ICU 2.4
98
     */
99
    const UnicodeString* string;
100
101
 public:
102
103
    /**
104
     * Create an iterator over the given set.  The iterator is valid
105
     * only so long as <tt>set</tt> is valid.
106
     * @param set set to iterate over
107
     * @stable ICU 2.4
108
     */
109
    UnicodeSetIterator(const UnicodeSet& set);
110
111
    /**
112
     * Create an iterator over nothing.  <tt>next()</tt> and
113
     * <tt>nextRange()</tt> return false. This is a convenience
114
     * constructor allowing the target to be set later.
115
     * @stable ICU 2.4
116
     */
117
    UnicodeSetIterator();
118
119
    /**
120
     * Destructor.
121
     * @stable ICU 2.4
122
     */
123
    virtual ~UnicodeSetIterator();
124
125
    /**
126
     * Returns true if the current element is a string.  If so, the
127
     * caller can retrieve it with <tt>getString()</tt>.  If this
128
     * method returns false, the current element is a code point or
129
     * code point range, depending on whether <tt>next()</tt> or
130
     * <tt>nextRange()</tt> was called.
131
     * Elements of types string and codepoint can both be retrieved
132
     * with the function <tt>getString()</tt>.
133
     * Elements of type codepoint can also be retrieved with
134
     * <tt>getCodepoint()</tt>.
135
     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
136
     * of the range, and <tt>getCodepointEnd()</tt> returns the end
137
     * of the range.
138
     * @stable ICU 2.4
139
     */
140
    inline UBool isString() const;
141
142
    /**
143
     * Returns the current code point, if <tt>isString()</tt> returned
144
     * false.  Otherwise returns an undefined result.
145
     * @stable ICU 2.4
146
     */
147
    inline UChar32 getCodepoint() const;
148
149
    /**
150
     * Returns the end of the current code point range, if
151
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
152
     * called.  Otherwise returns an undefined result.
153
     * @stable ICU 2.4
154
     */
155
    inline UChar32 getCodepointEnd() const;
156
157
    /**
158
     * Returns the current string, if <tt>isString()</tt> returned
159
     * true.  If the current iteration item is a code point, a UnicodeString
160
     * containing that single code point is returned.
161
     *
162
     * Ownership of the returned string remains with the iterator.
163
     * The string is guaranteed to remain valid only until the iterator is
164
     *   advanced to the next item, or until the iterator is deleted.
165
     * 
166
     * @stable ICU 2.4
167
     */
168
    const UnicodeString& getString();
169
170
    /**
171
     * Advances the iteration position to the next element in the set, 
172
     * which can be either a single code point or a string.  
173
     * If there are no more elements in the set, return false.
174
     *
175
     * <p>
176
     * If <tt>isString() == TRUE</tt>, the value is a
177
     * string, otherwise the value is a
178
     * single code point.  Elements of either type can be retrieved
179
     * with the function <tt>getString()</tt>, while elements of
180
     * consisting of a single code point can be retrieved with
181
     * <tt>getCodepoint()</tt>
182
     *
183
     * <p>The order of iteration is all code points in sorted order,
184
     * followed by all strings sorted order.    Do not mix
185
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
186
     * calling <tt>reset()</tt> between them.  The results of doing so
187
     * are undefined.
188
     *
189
     * @return true if there was another element in the set.
190
     * @stable ICU 2.4
191
     */
192
    UBool next();
193
194
    /**
195
     * Returns the next element in the set, either a code point range
196
     * or a string.  If there are no more elements in the set, return
197
     * false.  If <tt>isString() == TRUE</tt>, the value is a
198
     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
199
     * range of one or more code points from <tt>getCodepoint()</tt> to
200
     * <tt>getCodepointeEnd()</tt> inclusive.
201
     *
202
     * <p>The order of iteration is all code points ranges in sorted
203
     * order, followed by all strings sorted order.  Ranges are
204
     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
205
     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
206
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
207
     * <tt>reset()</tt> between them.  The results of doing so are
208
     * undefined.
209
     *
210
     * @return true if there was another element in the set.
211
     * @stable ICU 2.4
212
     */
213
    UBool nextRange();
214
215
    /**
216
     * Sets this iterator to visit the elements of the given set and
217
     * resets it to the start of that set.  The iterator is valid only
218
     * so long as <tt>set</tt> is valid.
219
     * @param set the set to iterate over.
220
     * @stable ICU 2.4
221
     */
222
    void reset(const UnicodeSet& set);
223
224
    /**
225
     * Resets this iterator to the start of the set.
226
     * @stable ICU 2.4
227
     */
228
    void reset();
229
230
    /**
231
     * ICU "poor man's RTTI", returns a UClassID for this class.
232
     *
233
     * @stable ICU 2.4
234
     */
235
    static UClassID U_EXPORT2 getStaticClassID();
236
237
    /**
238
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
239
     *
240
     * @stable ICU 2.4
241
     */
242
    virtual UClassID getDynamicClassID() const;
243
244
    // ======================= PRIVATES ===========================
245
246
 protected:
247
248
    // endElement and nextElements are really UChar32's, but we keep
249
    // them as signed int32_t's so we can do comparisons with
250
    // endElement set to -1.  Leave them as int32_t's.
251
    /** The set
252
     * @stable ICU 2.4
253
     */
254
    const UnicodeSet* set;
255
    /** End range
256
     * @stable ICU 2.4
257
     */
258
    int32_t endRange;
259
    /** Range
260
     * @stable ICU 2.4
261
     */
262
    int32_t range;
263
    /** End element
264
     * @stable ICU 2.4
265
     */
266
    int32_t endElement;
267
    /** Next element
268
     * @stable ICU 2.4
269
     */
270
    int32_t nextElement;
271
    //UBool abbreviated;
272
    /** Next string
273
     * @stable ICU 2.4
274
     */
275
    int32_t nextString;
276
    /** String count
277
     * @stable ICU 2.4
278
     */
279
    int32_t stringCount;
280
281
    /**
282
     *  Points to the string to use when the caller asks for a
283
     *  string and the current iteration item is a code point, not a string.
284
     *  @internal
285
     */
286
    UnicodeString *cpString;
287
288
    /** Copy constructor. Disallowed.
289
     * @stable ICU 2.4
290
     */
291
    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
292
293
    /** Assignment operator. Disallowed.
294
     * @stable ICU 2.4
295
     */
296
    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
297
298
    /** Load range
299
     * @stable ICU 2.4
300
     */
301
    virtual void loadRange(int32_t range);
302
303
};
304
305
0
inline UBool UnicodeSetIterator::isString() const {
306
0
    return codepoint == (UChar32)IS_STRING;
307
0
}
308
309
0
inline UChar32 UnicodeSetIterator::getCodepoint() const {
310
0
    return codepoint;
311
0
}
312
313
0
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
314
0
    return codepointEnd;
315
0
}
316
317
318
U_NAMESPACE_END
319
320
#endif