/src/icu/source/common/ruleiter.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | **********************************************************************  | 
5  |  | * Copyright (c) 2003-2011, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | **********************************************************************  | 
8  |  | * Author: Alan Liu  | 
9  |  | * Created: September 24 2003  | 
10  |  | * Since: ICU 2.8  | 
11  |  | **********************************************************************  | 
12  |  | */  | 
13  |  | #include "ruleiter.h"  | 
14  |  | #include "unicode/parsepos.h"  | 
15  |  | #include "unicode/symtable.h"  | 
16  |  | #include "unicode/unistr.h"  | 
17  |  | #include "unicode/utf16.h"  | 
18  |  | #include "patternprops.h"  | 
19  |  |  | 
20  |  | /* \U87654321 or \ud800\udc00 */  | 
21  | 0  | #define MAX_U_NOTATION_LEN 12  | 
22  |  |  | 
23  |  | U_NAMESPACE_BEGIN  | 
24  |  |  | 
25  |  | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,  | 
26  |  |                       ParsePosition& thePos) :  | 
27  | 0  |     text(theText),  | 
28  | 0  |     pos(thePos),  | 
29  | 0  |     sym(theSym),  | 
30  | 0  |     buf(0),  | 
31  | 0  |     bufPos(0)  | 
32  | 0  | {} | 
33  |  |  | 
34  | 0  | UBool RuleCharacterIterator::atEnd() const { | 
35  | 0  |     return buf == 0 && pos.getIndex() == text.length();  | 
36  | 0  | }  | 
37  |  |  | 
38  | 0  | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | 
39  | 0  |     if (U_FAILURE(ec)) return DONE;  | 
40  |  |  | 
41  | 0  |     UChar32 c = DONE;  | 
42  | 0  |     isEscaped = FALSE;  | 
43  |  | 
  | 
44  | 0  |     for (;;) { | 
45  | 0  |         c = _current();  | 
46  | 0  |         _advance(U16_LENGTH(c));  | 
47  |  | 
  | 
48  | 0  |         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&  | 
49  | 0  |             (options & PARSE_VARIABLES) != 0 && sym != 0) { | 
50  | 0  |             UnicodeString name = sym->parseReference(text, pos, text.length());  | 
51  |  |             // If name is empty there was an isolated SYMBOL_REF;  | 
52  |  |             // return it.  Caller must be prepared for this.  | 
53  | 0  |             if (name.length() == 0) { | 
54  | 0  |                 break;  | 
55  | 0  |             }  | 
56  | 0  |             bufPos = 0;  | 
57  | 0  |             buf = sym->lookup(name);  | 
58  | 0  |             if (buf == 0) { | 
59  | 0  |                 ec = U_UNDEFINED_VARIABLE;  | 
60  | 0  |                 return DONE;  | 
61  | 0  |             }  | 
62  |  |             // Handle empty variable value  | 
63  | 0  |             if (buf->length() == 0) { | 
64  | 0  |                 buf = 0;  | 
65  | 0  |             }  | 
66  | 0  |             continue;  | 
67  | 0  |         }  | 
68  |  |  | 
69  | 0  |         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { | 
70  | 0  |             continue;  | 
71  | 0  |         }  | 
72  |  |  | 
73  | 0  |         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | 
74  | 0  |             UnicodeString tempEscape;  | 
75  | 0  |             int32_t offset = 0;  | 
76  | 0  |             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);  | 
77  | 0  |             jumpahead(offset);  | 
78  | 0  |             isEscaped = TRUE;  | 
79  | 0  |             if (c < 0) { | 
80  | 0  |                 ec = U_MALFORMED_UNICODE_ESCAPE;  | 
81  | 0  |                 return DONE;  | 
82  | 0  |             }  | 
83  | 0  |         }  | 
84  |  |  | 
85  | 0  |         break;  | 
86  | 0  |     }  | 
87  |  |  | 
88  | 0  |     return c;  | 
89  | 0  | }  | 
90  |  |  | 
91  | 0  | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | 
92  | 0  |     p.buf = buf;  | 
93  | 0  |     p.pos = pos.getIndex();  | 
94  | 0  |     p.bufPos = bufPos;  | 
95  | 0  | }  | 
96  |  |  | 
97  | 0  | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | 
98  | 0  |     buf = p.buf;  | 
99  | 0  |     pos.setIndex(p.pos);  | 
100  | 0  |     bufPos = p.bufPos;  | 
101  | 0  | }  | 
102  |  |  | 
103  | 0  | void RuleCharacterIterator::skipIgnored(int32_t options) { | 
104  | 0  |     if ((options & SKIP_WHITESPACE) != 0) { | 
105  | 0  |         for (;;) { | 
106  | 0  |             UChar32 a = _current();  | 
107  | 0  |             if (!PatternProps::isWhiteSpace(a)) break;  | 
108  | 0  |             _advance(U16_LENGTH(a));  | 
109  | 0  |         }  | 
110  | 0  |     }  | 
111  | 0  | }  | 
112  |  |  | 
113  | 0  | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { | 
114  | 0  |     if (maxLookAhead < 0) { | 
115  | 0  |         maxLookAhead = 0x7FFFFFFF;  | 
116  | 0  |     }  | 
117  | 0  |     if (buf != 0) { | 
118  | 0  |         buf->extract(bufPos, maxLookAhead, result);  | 
119  | 0  |     } else { | 
120  | 0  |         text.extract(pos.getIndex(), maxLookAhead, result);  | 
121  | 0  |     }  | 
122  | 0  |     return result;  | 
123  | 0  | }  | 
124  |  |  | 
125  | 0  | void RuleCharacterIterator::jumpahead(int32_t count) { | 
126  | 0  |     _advance(count);  | 
127  | 0  | }  | 
128  |  |  | 
129  |  | /*  | 
130  |  | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { | 
131  |  |     int32_t b = pos.getIndex();  | 
132  |  |     text.extract(0, b, result);  | 
133  |  |     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index  | 
134  |  | }  | 
135  |  | */  | 
136  |  |  | 
137  | 0  | UChar32 RuleCharacterIterator::_current() const { | 
138  | 0  |     if (buf != 0) { | 
139  | 0  |         return buf->char32At(bufPos);  | 
140  | 0  |     } else { | 
141  | 0  |         int i = pos.getIndex();  | 
142  | 0  |         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;  | 
143  | 0  |     }  | 
144  | 0  | }  | 
145  |  |  | 
146  | 0  | void RuleCharacterIterator::_advance(int32_t count) { | 
147  | 0  |     if (buf != 0) { | 
148  | 0  |         bufPos += count;  | 
149  | 0  |         if (bufPos == buf->length()) { | 
150  | 0  |             buf = 0;  | 
151  | 0  |         }  | 
152  | 0  |     } else { | 
153  | 0  |         pos.setIndex(pos.getIndex() + count);  | 
154  | 0  |         if (pos.getIndex() > text.length()) { | 
155  | 0  |             pos.setIndex(text.length());  | 
156  | 0  |         }  | 
157  | 0  |     }  | 
158  | 0  | }  | 
159  |  |  | 
160  |  | U_NAMESPACE_END  | 
161  |  |  | 
162  |  | //eof  |