Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/i18n/unicode/coleitr.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 ******************************************************************************
5
 *   Copyright (C) 1997-2014, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 ******************************************************************************
8
 */
9
10
/**
11
 * \file 
12
 * \brief C++ API: Collation Element Iterator.
13
 */
14
15
/**
16
* File coleitr.h
17
*
18
* Created by: Helena Shih
19
*
20
* Modification History:
21
*
22
*  Date       Name        Description
23
*
24
*  8/18/97    helena      Added internal API documentation.
25
* 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
26
* 12/10/99    aliu        Ported Thai collation support from Java.
27
* 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
28
* 02/19/01    swquek      Removed CollationElementsIterator() since it is 
29
*                         private constructor and no calls are made to it
30
* 2012-2014   markus      Rewritten in C++ again.
31
*/
32
33
#ifndef COLEITR_H
34
#define COLEITR_H
35
36
#include "unicode/utypes.h"
37
38
#if U_SHOW_CPLUSPLUS_API
39
40
#if !UCONFIG_NO_COLLATION
41
42
#include "unicode/unistr.h"
43
#include "unicode/uobject.h"
44
45
struct UCollationElements;
46
struct UHashtable;
47
48
U_NAMESPACE_BEGIN
49
50
struct CollationData;
51
52
class CharacterIterator;
53
class CollationIterator;
54
class RuleBasedCollator;
55
class UCollationPCE;
56
class UVector32;
57
58
/**
59
* The CollationElementIterator class is used as an iterator to walk through     
60
* each character of an international string. Use the iterator to return the
61
* ordering priority of the positioned character. The ordering priority of a 
62
* character, which we refer to as a key, defines how a character is collated in 
63
* the given collation object.
64
* For example, consider the following in Slovak and in traditional Spanish collation:
65
* <pre>
66
*        "ca" -> the first key is key('c') and second key is key('a').
67
*        "cha" -> the first key is key('ch') and second key is key('a').</pre>
68
* And in German phonebook collation,
69
* <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
70
*        the third key is key('b'). \endhtmlonly </pre>
71
* The key of a character, is an integer composed of primary order(short),
72
* secondary order(char), and tertiary order(char). Java strictly defines the 
73
* size and signedness of its primitive data types. Therefore, the static
74
* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
75
* int32_t to ensure the correctness of the key value.
76
* <p>Example of the iterator usage: (without error checking)
77
* <pre>
78
* \code
79
*   void CollationElementIterator_Example()
80
*   {
81
*       UnicodeString str = "This is a test";
82
*       UErrorCode success = U_ZERO_ERROR;
83
*       RuleBasedCollator* rbc =
84
*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
85
*       CollationElementIterator* c =
86
*           rbc->createCollationElementIterator( str );
87
*       int32_t order = c->next(success);
88
*       c->reset();
89
*       order = c->previous(success);
90
*       delete c;
91
*       delete rbc;
92
*   }
93
* \endcode
94
* </pre>
95
* <p>
96
* The method next() returns the collation order of the next character based on
97
* the comparison level of the collator. The method previous() returns the
98
* collation order of the previous character based on the comparison level of
99
* the collator. The Collation Element Iterator moves only in one direction
100
* between calls to reset(), setOffset(), or setText(). That is, next() 
101
* and previous() can not be inter-used. Whenever previous() is to be called after 
102
* next() or vice versa, reset(), setOffset() or setText() has to be called first
103
* to reset the status, shifting pointers to either the end or the start of
104
* the string (reset() or setText()), or the specified position (setOffset()).
105
* Hence at the next call of next() or previous(), the first or last collation order,
106
* or collation order at the specified position will be returned. If a change of
107
* direction is done without one of these calls, the result is undefined.
108
* <p>
109
* The result of a forward iterate (next()) and reversed result of the backward
110
* iterate (previous()) on the same string are equivalent, if collation orders
111
* with the value 0 are ignored.
112
* Character based on the comparison level of the collator.  A collation order 
113
* consists of primary order, secondary order and tertiary order.  The data 
114
* type of the collation order is <strong>int32_t</strong>. 
115
*
116
* Note, CollationElementIterator should not be subclassed.
117
* @see     Collator
118
* @see     RuleBasedCollator
119
* @version 1.8 Jan 16 2001
120
*/
121
class U_I18N_API CollationElementIterator U_FINAL : public UObject {
122
public: 
123
124
    // CollationElementIterator public data member ------------------------------
125
126
    enum {
127
        /**
128
         * NULLORDER indicates that an error has occurred while processing
129
         * @stable ICU 2.0
130
         */
131
        NULLORDER = (int32_t)0xffffffff
132
    };
133
134
    // CollationElementIterator public constructor/destructor -------------------
135
136
    /**
137
    * Copy constructor.
138
    *
139
    * @param other    the object to be copied from
140
    * @stable ICU 2.0
141
    */
142
    CollationElementIterator(const CollationElementIterator& other);
143
144
    /** 
145
    * Destructor
146
    * @stable ICU 2.0
147
    */
148
    virtual ~CollationElementIterator();
149
150
    // CollationElementIterator public methods ----------------------------------
151
152
    /**
153
    * Returns true if "other" is the same as "this"
154
    *
155
    * @param other    the object to be compared
156
    * @return         true if "other" is the same as "this"
157
    * @stable ICU 2.0
158
    */
159
    bool operator==(const CollationElementIterator& other) const;
160
161
    /**
162
    * Returns true if "other" is not the same as "this".
163
    *
164
    * @param other    the object to be compared
165
    * @return         true if "other" is not the same as "this"
166
    * @stable ICU 2.0
167
    */
168
    bool operator!=(const CollationElementIterator& other) const;
169
170
    /**
171
    * Resets the cursor to the beginning of the string.
172
    * @stable ICU 2.0
173
    */
174
    void reset(void);
175
176
    /**
177
    * Gets the ordering priority of the next character in the string.
178
    * @param status the error code status.
179
    * @return the next character's ordering. otherwise returns NULLORDER if an 
180
    *         error has occurred or if the end of string has been reached
181
    * @stable ICU 2.0
182
    */
183
    int32_t next(UErrorCode& status);
184
185
    /**
186
    * Get the ordering priority of the previous collation element in the string.
187
    * @param status the error code status.
188
    * @return the previous element's ordering. otherwise returns NULLORDER if an 
189
    *         error has occurred or if the start of string has been reached
190
    * @stable ICU 2.0
191
    */
192
    int32_t previous(UErrorCode& status);
193
194
    /**
195
    * Gets the primary order of a collation order.
196
    * @param order the collation order
197
    * @return the primary order of a collation order.
198
    * @stable ICU 2.0
199
    */
200
    static inline int32_t primaryOrder(int32_t order);
201
202
    /**
203
    * Gets the secondary order of a collation order.
204
    * @param order the collation order
205
    * @return the secondary order of a collation order.
206
    * @stable ICU 2.0
207
    */
208
    static inline int32_t secondaryOrder(int32_t order);
209
210
    /**
211
    * Gets the tertiary order of a collation order.
212
    * @param order the collation order
213
    * @return the tertiary order of a collation order.
214
    * @stable ICU 2.0
215
    */
216
    static inline int32_t tertiaryOrder(int32_t order);
217
218
    /**
219
    * Return the maximum length of any expansion sequences that end with the 
220
    * specified comparison order.
221
    * @param order a collation order returned by previous or next.
222
    * @return maximum size of the expansion sequences ending with the collation 
223
    *         element or 1 if collation element does not occur at the end of any 
224
    *         expansion sequence
225
    * @stable ICU 2.0
226
    */
227
    int32_t getMaxExpansion(int32_t order) const;
228
229
    /**
230
    * Gets the comparison order in the desired strength. Ignore the other
231
    * differences.
232
    * @param order The order value
233
    * @stable ICU 2.0
234
    */
235
    int32_t strengthOrder(int32_t order) const;
236
237
    /**
238
    * Sets the source string.
239
    * @param str the source string.
240
    * @param status the error code status.
241
    * @stable ICU 2.0
242
    */
243
    void setText(const UnicodeString& str, UErrorCode& status);
244
245
    /**
246
    * Sets the source string.
247
    * @param str the source character iterator.
248
    * @param status the error code status.
249
    * @stable ICU 2.0
250
    */
251
    void setText(CharacterIterator& str, UErrorCode& status);
252
253
    /**
254
    * Checks if a comparison order is ignorable.
255
    * @param order the collation order.
256
    * @return true if a character is ignorable, false otherwise.
257
    * @stable ICU 2.0
258
    */
259
    static inline UBool isIgnorable(int32_t order);
260
261
    /**
262
    * Gets the offset of the currently processed character in the source string.
263
    * @return the offset of the character.
264
    * @stable ICU 2.0
265
    */
266
    int32_t getOffset(void) const;
267
268
    /**
269
    * Sets the offset of the currently processed character in the source string.
270
    * @param newOffset the new offset.
271
    * @param status the error code status.
272
    * @return the offset of the character.
273
    * @stable ICU 2.0
274
    */
275
    void setOffset(int32_t newOffset, UErrorCode& status);
276
277
    /**
278
    * ICU "poor man's RTTI", returns a UClassID for the actual class.
279
    *
280
    * @stable ICU 2.2
281
    */
282
    virtual UClassID getDynamicClassID() const;
283
284
    /**
285
    * ICU "poor man's RTTI", returns a UClassID for this class.
286
    *
287
    * @stable ICU 2.2
288
    */
289
    static UClassID U_EXPORT2 getStaticClassID();
290
291
#ifndef U_HIDE_INTERNAL_API
292
    /** @internal */
293
0
    static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) {
294
0
        return reinterpret_cast<CollationElementIterator *>(uc);
295
0
    }
296
    /** @internal */
297
0
    static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) {
298
0
        return reinterpret_cast<const CollationElementIterator *>(uc);
299
0
    }
300
    /** @internal */
301
0
    inline UCollationElements *toUCollationElements() {
302
0
        return reinterpret_cast<UCollationElements *>(this);
303
0
    }
304
    /** @internal */
305
0
    inline const UCollationElements *toUCollationElements() const {
306
0
        return reinterpret_cast<const UCollationElements *>(this);
307
0
    }
308
#endif  // U_HIDE_INTERNAL_API
309
310
private:
311
    friend class RuleBasedCollator;
312
    friend class UCollationPCE;
313
314
    /**
315
    * CollationElementIterator constructor. This takes the source string and the 
316
    * collation object. The cursor will walk thru the source string based on the 
317
    * predefined collation rules. If the source string is empty, NULLORDER will 
318
    * be returned on the calls to next().
319
    * @param sourceText    the source string.
320
    * @param order         the collation object.
321
    * @param status        the error code status.
322
    */
323
    CollationElementIterator(const UnicodeString& sourceText,
324
        const RuleBasedCollator* order, UErrorCode& status);
325
    // Note: The constructors should take settings & tailoring, not a collator,
326
    // to avoid circular dependencies.
327
    // However, for operator==() we would need to be able to compare tailoring data for equality
328
    // without making CollationData or CollationTailoring depend on TailoredSet.
329
    // (See the implementation of RuleBasedCollator::operator==().)
330
    // That might require creating an intermediate class that would be used
331
    // by both CollationElementIterator and RuleBasedCollator
332
    // but only contain the part of RBC== related to data and rules.
333
334
    /**
335
    * CollationElementIterator constructor. This takes the source string and the 
336
    * collation object.  The cursor will walk thru the source string based on the 
337
    * predefined collation rules.  If the source string is empty, NULLORDER will 
338
    * be returned on the calls to next().
339
    * @param sourceText    the source string.
340
    * @param order         the collation object.
341
    * @param status        the error code status.
342
    */
343
    CollationElementIterator(const CharacterIterator& sourceText,
344
        const RuleBasedCollator* order, UErrorCode& status);
345
346
    /**
347
    * Assignment operator
348
    *
349
    * @param other    the object to be copied
350
    */
351
    const CollationElementIterator&
352
        operator=(const CollationElementIterator& other);
353
354
    CollationElementIterator(); // default constructor not implemented
355
356
    /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
357
0
    inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
358
359
    static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
360
361
    static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
362
363
    // CollationElementIterator private data members ----------------------------
364
365
    CollationIterator *iter_;  // owned
366
    const RuleBasedCollator *rbc_;  // aliased
367
    uint32_t otherHalf_;
368
    /**
369
     * <0: backwards; 0: just after reset() (previous() begins from end);
370
     * 1: just after setOffset(); >1: forward
371
     */
372
    int8_t dir_;
373
    /**
374
     * Stores offsets from expansions and from unsafe-backwards iteration,
375
     * so that getOffset() returns intermediate offsets for the CEs
376
     * that are consistent with forward iteration.
377
     */
378
    UVector32 *offsets_;
379
380
    UnicodeString string_;
381
};
382
383
// CollationElementIterator inline method definitions --------------------------
384
385
inline int32_t CollationElementIterator::primaryOrder(int32_t order)
386
0
{
387
0
    return (order >> 16) & 0xffff;
388
0
}
389
390
inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
391
0
{
392
0
    return (order >> 8) & 0xff;
393
0
}
394
395
inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
396
0
{
397
0
    return order & 0xff;
398
0
}
399
400
inline UBool CollationElementIterator::isIgnorable(int32_t order)
401
0
{
402
0
    return (order & 0xffff0000) == 0;
403
0
}
404
405
U_NAMESPACE_END
406
407
#endif /* #if !UCONFIG_NO_COLLATION */
408
409
#endif /* U_SHOW_CPLUSPLUS_API */
410
411
#endif