/src/icu/source/common/ruleiter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (c) 2003-2011, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | * Author: Alan Liu |
9 | | * Created: September 24 2003 |
10 | | * Since: ICU 2.8 |
11 | | ********************************************************************** |
12 | | */ |
13 | | #include "ruleiter.h" |
14 | | #include "unicode/parsepos.h" |
15 | | #include "unicode/symtable.h" |
16 | | #include "unicode/unistr.h" |
17 | | #include "unicode/utf16.h" |
18 | | #include "patternprops.h" |
19 | | |
20 | | /* \U87654321 or \ud800\udc00 */ |
21 | 0 | #define MAX_U_NOTATION_LEN 12 |
22 | | |
23 | | U_NAMESPACE_BEGIN |
24 | | |
25 | | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, |
26 | | ParsePosition& thePos) : |
27 | 0 | text(theText), |
28 | 0 | pos(thePos), |
29 | 0 | sym(theSym), |
30 | 0 | buf(0), |
31 | 0 | bufPos(0) |
32 | 0 | {} |
33 | | |
34 | 0 | UBool RuleCharacterIterator::atEnd() const { |
35 | 0 | return buf == 0 && pos.getIndex() == text.length(); |
36 | 0 | } |
37 | | |
38 | 0 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { |
39 | 0 | if (U_FAILURE(ec)) return DONE; |
40 | | |
41 | 0 | UChar32 c = DONE; |
42 | 0 | isEscaped = FALSE; |
43 | |
|
44 | 0 | for (;;) { |
45 | 0 | c = _current(); |
46 | 0 | _advance(U16_LENGTH(c)); |
47 | |
|
48 | 0 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && |
49 | 0 | (options & PARSE_VARIABLES) != 0 && sym != 0) { |
50 | 0 | UnicodeString name = sym->parseReference(text, pos, text.length()); |
51 | | // If name is empty there was an isolated SYMBOL_REF; |
52 | | // return it. Caller must be prepared for this. |
53 | 0 | if (name.length() == 0) { |
54 | 0 | break; |
55 | 0 | } |
56 | 0 | bufPos = 0; |
57 | 0 | buf = sym->lookup(name); |
58 | 0 | if (buf == 0) { |
59 | 0 | ec = U_UNDEFINED_VARIABLE; |
60 | 0 | return DONE; |
61 | 0 | } |
62 | | // Handle empty variable value |
63 | 0 | if (buf->length() == 0) { |
64 | 0 | buf = 0; |
65 | 0 | } |
66 | 0 | continue; |
67 | 0 | } |
68 | | |
69 | 0 | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { |
70 | 0 | continue; |
71 | 0 | } |
72 | | |
73 | 0 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { |
74 | 0 | UnicodeString tempEscape; |
75 | 0 | int32_t offset = 0; |
76 | 0 | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
77 | 0 | jumpahead(offset); |
78 | 0 | isEscaped = TRUE; |
79 | 0 | if (c < 0) { |
80 | 0 | ec = U_MALFORMED_UNICODE_ESCAPE; |
81 | 0 | return DONE; |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | 0 | break; |
86 | 0 | } |
87 | | |
88 | 0 | return c; |
89 | 0 | } |
90 | | |
91 | 0 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { |
92 | 0 | p.buf = buf; |
93 | 0 | p.pos = pos.getIndex(); |
94 | 0 | p.bufPos = bufPos; |
95 | 0 | } |
96 | | |
97 | 0 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { |
98 | 0 | buf = p.buf; |
99 | 0 | pos.setIndex(p.pos); |
100 | 0 | bufPos = p.bufPos; |
101 | 0 | } |
102 | | |
103 | 0 | void RuleCharacterIterator::skipIgnored(int32_t options) { |
104 | 0 | if ((options & SKIP_WHITESPACE) != 0) { |
105 | 0 | for (;;) { |
106 | 0 | UChar32 a = _current(); |
107 | 0 | if (!PatternProps::isWhiteSpace(a)) break; |
108 | 0 | _advance(U16_LENGTH(a)); |
109 | 0 | } |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | 0 | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { |
114 | 0 | if (maxLookAhead < 0) { |
115 | 0 | maxLookAhead = 0x7FFFFFFF; |
116 | 0 | } |
117 | 0 | if (buf != 0) { |
118 | 0 | buf->extract(bufPos, maxLookAhead, result); |
119 | 0 | } else { |
120 | 0 | text.extract(pos.getIndex(), maxLookAhead, result); |
121 | 0 | } |
122 | 0 | return result; |
123 | 0 | } |
124 | | |
125 | 0 | void RuleCharacterIterator::jumpahead(int32_t count) { |
126 | 0 | _advance(count); |
127 | 0 | } |
128 | | |
129 | | /* |
130 | | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
131 | | int32_t b = pos.getIndex(); |
132 | | text.extract(0, b, result); |
133 | | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
134 | | } |
135 | | */ |
136 | | |
137 | 0 | UChar32 RuleCharacterIterator::_current() const { |
138 | 0 | if (buf != 0) { |
139 | 0 | return buf->char32At(bufPos); |
140 | 0 | } else { |
141 | 0 | int i = pos.getIndex(); |
142 | 0 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; |
143 | 0 | } |
144 | 0 | } |
145 | | |
146 | 0 | void RuleCharacterIterator::_advance(int32_t count) { |
147 | 0 | if (buf != 0) { |
148 | 0 | bufPos += count; |
149 | 0 | if (bufPos == buf->length()) { |
150 | 0 | buf = 0; |
151 | 0 | } |
152 | 0 | } else { |
153 | 0 | pos.setIndex(pos.getIndex() + count); |
154 | 0 | if (pos.getIndex() > text.length()) { |
155 | 0 | pos.setIndex(text.length()); |
156 | 0 | } |
157 | 0 | } |
158 | 0 | } |
159 | | |
160 | | U_NAMESPACE_END |
161 | | |
162 | | //eof |