/src/icu/icu4c/source/common/usetiter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (c) 2002-2006, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | */ |
9 | | #include "unicode/usetiter.h" |
10 | | #include "unicode/uniset.h" |
11 | | #include "unicode/unistr.h" |
12 | | #include "uvector.h" |
13 | | |
14 | | U_NAMESPACE_BEGIN |
15 | | |
16 | | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) |
17 | | |
18 | | /** |
19 | | * Create an iterator |
20 | | * @param set set to iterate over |
21 | | */ |
22 | 811k | UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { |
23 | 811k | cpString = nullptr; |
24 | 811k | reset(uSet); |
25 | 811k | } |
26 | | |
27 | | /** |
28 | | * Create an iterator. Convenience for when the contents are to be set later. |
29 | | */ |
30 | 0 | UnicodeSetIterator::UnicodeSetIterator() { |
31 | 0 | this->set = nullptr; |
32 | 0 | cpString = nullptr; |
33 | 0 | reset(); |
34 | 0 | } |
35 | | |
36 | 811k | UnicodeSetIterator::~UnicodeSetIterator() { |
37 | 811k | delete cpString; |
38 | 811k | } |
39 | | |
40 | | /** |
41 | | * Returns the next element in the set. |
42 | | * @return true if there was another element in the set. |
43 | | * if so, if codepoint == IS_STRING, the value is a string in the string field |
44 | | * else the value is a single code point in the codepoint field. |
45 | | * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order, |
46 | | * and that all code points are returned before any strings are returned. |
47 | | * <br>Note also that the codepointEnd is undefined after calling this method. |
48 | | */ |
49 | 16.2M | UBool UnicodeSetIterator::next() { |
50 | 16.2M | if (nextElement <= endElement) { |
51 | 7.39M | codepoint = codepointEnd = nextElement++; |
52 | 7.39M | string = nullptr; |
53 | 7.39M | return true; |
54 | 7.39M | } |
55 | 8.90M | if (range < endRange) { |
56 | 8.09M | loadRange(++range); |
57 | 8.09M | codepoint = codepointEnd = nextElement++; |
58 | 8.09M | string = nullptr; |
59 | 8.09M | return true; |
60 | 8.09M | } |
61 | | |
62 | 811k | if (nextString >= stringCount) return false; |
63 | 6 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
64 | 6 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
65 | 6 | return true; |
66 | 811k | } |
67 | | |
68 | | /** |
69 | | * @return true if there was another element in the set. |
70 | | * if so, if codepoint == IS_STRING, the value is a string in the string field |
71 | | * else the value is a range of codepoints in the <codepoint, codepointEnd> fields. |
72 | | * <br>Note that the codepoints are in sorted order, and the strings are in sorted order, |
73 | | * and that all code points are returned before any strings are returned. |
74 | | * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order, |
75 | | * and that all ranges are returned before any strings are returned. |
76 | | * <br>You are also guaranteed that ranges are disjoint and non-contiguous. |
77 | | * <br>Note also that the codepointEnd is undefined after calling this method. |
78 | | */ |
79 | 0 | UBool UnicodeSetIterator::nextRange() { |
80 | 0 | string = nullptr; |
81 | 0 | if (nextElement <= endElement) { |
82 | 0 | codepointEnd = endElement; |
83 | 0 | codepoint = nextElement; |
84 | 0 | nextElement = endElement+1; |
85 | 0 | return true; |
86 | 0 | } |
87 | 0 | if (range < endRange) { |
88 | 0 | loadRange(++range); |
89 | 0 | codepointEnd = endElement; |
90 | 0 | codepoint = nextElement; |
91 | 0 | nextElement = endElement+1; |
92 | 0 | return true; |
93 | 0 | } |
94 | | |
95 | 0 | if (nextString >= stringCount) return false; |
96 | 0 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
97 | 0 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
98 | 0 | return true; |
99 | 0 | } |
100 | | |
101 | | /** |
102 | | *@param set the set to iterate over. This allows reuse of the iterator. |
103 | | */ |
104 | 811k | void UnicodeSetIterator::reset(const UnicodeSet& uSet) { |
105 | 811k | this->set = &uSet; |
106 | 811k | reset(); |
107 | 811k | } |
108 | | |
109 | | /** |
110 | | * Resets to the start, to allow the iteration to start over again. |
111 | | */ |
112 | 811k | void UnicodeSetIterator::reset() { |
113 | 811k | if (set == nullptr) { |
114 | | // Set up indices to empty iteration |
115 | 0 | endRange = -1; |
116 | 0 | stringCount = 0; |
117 | 811k | } else { |
118 | 811k | endRange = set->getRangeCount() - 1; |
119 | 811k | stringCount = set->stringsSize(); |
120 | 811k | } |
121 | 811k | range = 0; |
122 | 811k | endElement = -1; |
123 | 811k | nextElement = 0; |
124 | 811k | if (endRange >= 0) { |
125 | 811k | loadRange(range); |
126 | 811k | } |
127 | 811k | nextString = 0; |
128 | 811k | string = nullptr; |
129 | 811k | } |
130 | | |
131 | 8.90M | void UnicodeSetIterator::loadRange(int32_t iRange) { |
132 | 8.90M | nextElement = set->getRangeStart(iRange); |
133 | 8.90M | endElement = set->getRangeEnd(iRange); |
134 | 8.90M | } |
135 | | |
136 | | |
137 | 1.20M | const UnicodeString& UnicodeSetIterator::getString() { |
138 | 1.20M | if (string==nullptr && codepoint!=(UChar32)IS_STRING) { |
139 | 1.20M | if (cpString == nullptr) { |
140 | 583 | cpString = new UnicodeString(); |
141 | 583 | } |
142 | 1.20M | if (cpString != nullptr) { |
143 | 1.20M | cpString->setTo((UChar32)codepoint); |
144 | 1.20M | } |
145 | 1.20M | string = cpString; |
146 | 1.20M | } |
147 | 1.20M | return *string; |
148 | 1.20M | } |
149 | | |
150 | | U_NAMESPACE_END |
151 | | |
152 | | //eof |