/src/icu/source/common/ruleiter.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | **********************************************************************  | 
5  |  | * Copyright (c) 2003-2011, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | **********************************************************************  | 
8  |  | * Author: Alan Liu  | 
9  |  | * Created: September 24 2003  | 
10  |  | * Since: ICU 2.8  | 
11  |  | **********************************************************************  | 
12  |  | */  | 
13  |  | #ifndef _RULEITER_H_  | 
14  |  | #define _RULEITER_H_  | 
15  |  |  | 
16  |  | #include "unicode/uobject.h"  | 
17  |  |  | 
18  |  | U_NAMESPACE_BEGIN  | 
19  |  |  | 
20  |  | class UnicodeString;  | 
21  |  | class ParsePosition;  | 
22  |  | class SymbolTable;  | 
23  |  |  | 
24  |  | /**  | 
25  |  |  * An iterator that returns 32-bit code points.  This class is deliberately  | 
26  |  |  * <em>not</em> related to any of the ICU character iterator classes  | 
27  |  |  * in order to minimize complexity.  | 
28  |  |  * @author Alan Liu  | 
29  |  |  * @since ICU 2.8  | 
30  |  |  */  | 
31  |  | class RuleCharacterIterator : public UMemory { | 
32  |  |  | 
33  |  |     // TODO: Ideas for later.  (Do not implement if not needed, lest the  | 
34  |  |     // code coverage numbers go down due to unused methods.)  | 
35  |  |     // 1. Add a copy constructor, operator==() method.  | 
36  |  |     // 2. Rather than return DONE, throw an exception if the end  | 
37  |  |     // is reached -- this is an alternate usage model, probably not useful.  | 
38  |  |  | 
39  |  | private:  | 
40  |  |     /**  | 
41  |  |      * Text being iterated.  | 
42  |  |      */      | 
43  |  |     const UnicodeString& text;  | 
44  |  |  | 
45  |  |     /**  | 
46  |  |      * Position of iterator.  | 
47  |  |      */  | 
48  |  |     ParsePosition& pos;  | 
49  |  |  | 
50  |  |     /**  | 
51  |  |      * Symbol table used to parse and dereference variables.  May be 0.  | 
52  |  |      */  | 
53  |  |     const SymbolTable* sym;  | 
54  |  |       | 
55  |  |     /**  | 
56  |  |      * Current variable expansion, or 0 if none.  | 
57  |  |      */  | 
58  |  |     const UnicodeString* buf;  | 
59  |  |  | 
60  |  |     /**  | 
61  |  |      * Position within buf.  Meaningless if buf == 0.  | 
62  |  |      */  | 
63  |  |     int32_t bufPos;  | 
64  |  |  | 
65  |  | public:  | 
66  |  |     /**  | 
67  |  |      * Value returned when there are no more characters to iterate.  | 
68  |  |      */  | 
69  |  |     enum { DONE = -1 }; | 
70  |  |  | 
71  |  |     /**  | 
72  |  |      * Bitmask option to enable parsing of variable names.  If (options &  | 
73  |  |      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to  | 
74  |  |      * its value.  Variables are parsed using the SymbolTable API.  | 
75  |  |      */  | 
76  |  |     enum { PARSE_VARIABLES = 1 }; | 
77  |  |  | 
78  |  |     /**  | 
79  |  |      * Bitmask option to enable parsing of escape sequences.  If (options &  | 
80  |  |      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded  | 
81  |  |      * to its value.  Escapes are parsed using Utility.unescapeAt().  | 
82  |  |      */  | 
83  |  |     enum { PARSE_ESCAPES   = 2 }; | 
84  |  |  | 
85  |  |     /**  | 
86  |  |      * Bitmask option to enable skipping of whitespace.  If (options &  | 
87  |  |      * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently  | 
88  |  |      * skipped, as if they were not present in the input.  | 
89  |  |      */  | 
90  |  |     enum { SKIP_WHITESPACE = 4 }; | 
91  |  |  | 
92  |  |     /**  | 
93  |  |      * Constructs an iterator over the given text, starting at the given  | 
94  |  |      * position.  | 
95  |  |      * @param text the text to be iterated  | 
96  |  |      * @param sym the symbol table, or null if there is none.  If sym is null,  | 
97  |  |      * then variables will not be dereferenced, even if the PARSE_VARIABLES  | 
98  |  |      * option is set.  | 
99  |  |      * @param pos upon input, the index of the next character to return.  If a  | 
100  |  |      * variable has been dereferenced, then pos will <em>not</em> increment as  | 
101  |  |      * characters of the variable value are iterated.  | 
102  |  |      */  | 
103  |  |     RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,  | 
104  |  |                           ParsePosition& pos);  | 
105  |  |       | 
106  |  |     /**  | 
107  |  |      * Returns true if this iterator has no more characters to return.  | 
108  |  |      */  | 
109  |  |     UBool atEnd() const;  | 
110  |  |  | 
111  |  |     /**  | 
112  |  |      * Returns the next character using the given options, or DONE if there  | 
113  |  |      * are no more characters, and advance the position to the next  | 
114  |  |      * character.  | 
115  |  |      * @param options one or more of the following options, bitwise-OR-ed  | 
116  |  |      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.  | 
117  |  |      * @param isEscaped output parameter set to true if the character  | 
118  |  |      * was escaped  | 
119  |  |      * @param ec input-output error code.  An error will only be set by  | 
120  |  |      * this routing if options includes PARSE_VARIABLES and an unknown  | 
121  |  |      * variable name is seen, or if options includes PARSE_ESCAPES and  | 
122  |  |      * an invalid escape sequence is seen.  | 
123  |  |      * @return the current 32-bit code point, or DONE  | 
124  |  |      */  | 
125  |  |     UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);  | 
126  |  |  | 
127  |  |     /**  | 
128  |  |      * Returns true if this iterator is currently within a variable expansion.  | 
129  |  |      */  | 
130  |  |     inline UBool inVariable() const;  | 
131  |  |  | 
132  |  |     /**  | 
133  |  |      * An opaque object representing the position of a RuleCharacterIterator.  | 
134  |  |      */  | 
135  |  |     struct Pos : public UMemory { | 
136  |  |     private:  | 
137  |  |         const UnicodeString* buf;  | 
138  |  |         int32_t pos;  | 
139  |  |         int32_t bufPos;  | 
140  |  |         friend class RuleCharacterIterator;  | 
141  |  |     };  | 
142  |  |  | 
143  |  |     /**  | 
144  |  |      * Sets an object which, when later passed to setPos(), will  | 
145  |  |      * restore this iterator's position.  Usage idiom:  | 
146  |  |      *  | 
147  |  |      * RuleCharacterIterator iterator = ...;  | 
148  |  |      * RuleCharacterIterator::Pos pos;  | 
149  |  |      * iterator.getPos(pos);  | 
150  |  |      * for (;;) { | 
151  |  |      *   iterator.getPos(pos);  | 
152  |  |      *   int c = iterator.next(...);  | 
153  |  |      *   ...  | 
154  |  |      * }  | 
155  |  |      * iterator.setPos(pos);  | 
156  |  |      *  | 
157  |  |      * @param p a position object to be set to this iterator's  | 
158  |  |      * current position.  | 
159  |  |      */  | 
160  |  |     void getPos(Pos& p) const;  | 
161  |  |  | 
162  |  |     /**  | 
163  |  |      * Restores this iterator to the position it had when getPos()  | 
164  |  |      * set the given object.  | 
165  |  |      * @param p a position object previously set by getPos()  | 
166  |  |      */  | 
167  |  |     void setPos(const Pos& p);  | 
168  |  |  | 
169  |  |     /**  | 
170  |  |      * Skips ahead past any ignored characters, as indicated by the given  | 
171  |  |      * options.  This is useful in conjunction with the lookahead() method.  | 
172  |  |      *  | 
173  |  |      * Currently, this only has an effect for SKIP_WHITESPACE.  | 
174  |  |      * @param options one or more of the following options, bitwise-OR-ed  | 
175  |  |      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.  | 
176  |  |      */  | 
177  |  |     void skipIgnored(int32_t options);  | 
178  |  |  | 
179  |  |     /**  | 
180  |  |      * Returns a string containing the remainder of the characters to be  | 
181  |  |      * returned by this iterator, without any option processing.  If the  | 
182  |  |      * iterator is currently within a variable expansion, this will only  | 
183  |  |      * extend to the end of the variable expansion.  This method is provided  | 
184  |  |      * so that iterators may interoperate with string-based APIs.  The typical  | 
185  |  |      * sequence of calls is to call skipIgnored(), then call lookahead(), then  | 
186  |  |      * parse the string returned by lookahead(), then call jumpahead() to  | 
187  |  |      * resynchronize the iterator.  | 
188  |  |      * @param result a string to receive the characters to be returned  | 
189  |  |      * by future calls to next()  | 
190  |  |      * @param maxLookAhead The maximum to copy into the result.  | 
191  |  |      * @return a reference to result  | 
192  |  |      */  | 
193  |  |     UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;  | 
194  |  |  | 
195  |  |     /**  | 
196  |  |      * Advances the position by the given number of 16-bit code units.  | 
197  |  |      * This is useful in conjunction with the lookahead() method.  | 
198  |  |      * @param count the number of 16-bit code units to jump over  | 
199  |  |      */  | 
200  |  |     void jumpahead(int32_t count);  | 
201  |  |  | 
202  |  |     /**  | 
203  |  |      * Returns a string representation of this object, consisting of the  | 
204  |  |      * characters being iterated, with a '|' marking the current position.  | 
205  |  |      * Position within an expanded variable is <em>not</em> indicated.  | 
206  |  |      * @param result output parameter to receive a string  | 
207  |  |      * representation of this object  | 
208  |  |      */  | 
209  |  | //    UnicodeString& toString(UnicodeString& result) const;  | 
210  |  |       | 
211  |  | private:  | 
212  |  |     /**  | 
213  |  |      * Returns the current 32-bit code point without parsing escapes, parsing  | 
214  |  |      * variables, or skipping whitespace.  | 
215  |  |      * @return the current 32-bit code point  | 
216  |  |      */  | 
217  |  |     UChar32 _current() const;  | 
218  |  |       | 
219  |  |     /**  | 
220  |  |      * Advances the position by the given amount.  | 
221  |  |      * @param count the number of 16-bit code units to advance past  | 
222  |  |      */  | 
223  |  |     void _advance(int32_t count);  | 
224  |  | };  | 
225  |  |  | 
226  | 0  | inline UBool RuleCharacterIterator::inVariable() const { | 
227  | 0  |     return buf != 0;  | 
228  | 0  | }  | 
229  |  |  | 
230  |  | U_NAMESPACE_END  | 
231  |  |  | 
232  |  | #endif // _RULEITER_H_  | 
233  |  | //eof  |