Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/ruleiter.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2003-2011, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
* Author: Alan Liu
9
* Created: September 24 2003
10
* Since: ICU 2.8
11
**********************************************************************
12
*/
13
#ifndef _RULEITER_H_
14
#define _RULEITER_H_
15
16
#include "unicode/uobject.h"
17
18
U_NAMESPACE_BEGIN
19
20
class UnicodeString;
21
class ParsePosition;
22
class SymbolTable;
23
24
/**
25
 * An iterator that returns 32-bit code points.  This class is deliberately
26
 * <em>not</em> related to any of the ICU character iterator classes
27
 * in order to minimize complexity.
28
 * @author Alan Liu
29
 * @since ICU 2.8
30
 */
31
class RuleCharacterIterator : public UMemory {
32
33
    // TODO: Ideas for later.  (Do not implement if not needed, lest the
34
    // code coverage numbers go down due to unused methods.)
35
    // 1. Add a copy constructor, operator==() method.
36
    // 2. Rather than return DONE, throw an exception if the end
37
    // is reached -- this is an alternate usage model, probably not useful.
38
39
private:
40
    /**
41
     * Text being iterated.
42
     */    
43
    const UnicodeString& text;
44
45
    /**
46
     * Position of iterator.
47
     */
48
    ParsePosition& pos;
49
50
    /**
51
     * Symbol table used to parse and dereference variables.  May be 0.
52
     */
53
    const SymbolTable* sym;
54
    
55
    /**
56
     * Current variable expansion, or 0 if none.
57
     */
58
    const UnicodeString* buf;
59
60
    /**
61
     * Position within buf.  Meaningless if buf == 0.
62
     */
63
    int32_t bufPos;
64
65
public:
66
    /**
67
     * Value returned when there are no more characters to iterate.
68
     */
69
    enum { DONE = -1 };
70
71
    /**
72
     * Bitmask option to enable parsing of variable names.  If (options &
73
     * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
74
     * its value.  Variables are parsed using the SymbolTable API.
75
     */
76
    enum { PARSE_VARIABLES = 1 };
77
78
    /**
79
     * Bitmask option to enable parsing of escape sequences.  If (options &
80
     * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
81
     * to its value.  Escapes are parsed using Utility.unescapeAt().
82
     */
83
    enum { PARSE_ESCAPES   = 2 };
84
85
    /**
86
     * Bitmask option to enable skipping of whitespace.  If (options &
87
     * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
88
     * skipped, as if they were not present in the input.
89
     */
90
    enum { SKIP_WHITESPACE = 4 };
91
92
    /**
93
     * Constructs an iterator over the given text, starting at the given
94
     * position.
95
     * @param text the text to be iterated
96
     * @param sym the symbol table, or null if there is none.  If sym is null,
97
     * then variables will not be dereferenced, even if the PARSE_VARIABLES
98
     * option is set.
99
     * @param pos upon input, the index of the next character to return.  If a
100
     * variable has been dereferenced, then pos will <em>not</em> increment as
101
     * characters of the variable value are iterated.
102
     */
103
    RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
104
                          ParsePosition& pos);
105
    
106
    /**
107
     * Returns true if this iterator has no more characters to return.
108
     */
109
    UBool atEnd() const;
110
111
    /**
112
     * Returns the next character using the given options, or DONE if there
113
     * are no more characters, and advance the position to the next
114
     * character.
115
     * @param options one or more of the following options, bitwise-OR-ed
116
     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
117
     * @param isEscaped output parameter set to true if the character
118
     * was escaped
119
     * @param ec input-output error code.  An error will only be set by
120
     * this routing if options includes PARSE_VARIABLES and an unknown
121
     * variable name is seen, or if options includes PARSE_ESCAPES and
122
     * an invalid escape sequence is seen.
123
     * @return the current 32-bit code point, or DONE
124
     */
125
    UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
126
127
    /**
128
     * Returns true if this iterator is currently within a variable expansion.
129
     */
130
    inline UBool inVariable() const;
131
132
    /**
133
     * An opaque object representing the position of a RuleCharacterIterator.
134
     */
135
    struct Pos : public UMemory {
136
    private:
137
        const UnicodeString* buf;
138
        int32_t pos;
139
        int32_t bufPos;
140
        friend class RuleCharacterIterator;
141
    };
142
143
    /**
144
     * Sets an object which, when later passed to setPos(), will
145
     * restore this iterator's position.  Usage idiom:
146
     *
147
     * RuleCharacterIterator iterator = ...;
148
     * RuleCharacterIterator::Pos pos;
149
     * iterator.getPos(pos);
150
     * for (;;) {
151
     *   iterator.getPos(pos);
152
     *   int c = iterator.next(...);
153
     *   ...
154
     * }
155
     * iterator.setPos(pos);
156
     *
157
     * @param p a position object to be set to this iterator's
158
     * current position.
159
     */
160
    void getPos(Pos& p) const;
161
162
    /**
163
     * Restores this iterator to the position it had when getPos()
164
     * set the given object.
165
     * @param p a position object previously set by getPos()
166
     */
167
    void setPos(const Pos& p);
168
169
    /**
170
     * Skips ahead past any ignored characters, as indicated by the given
171
     * options.  This is useful in conjunction with the lookahead() method.
172
     *
173
     * Currently, this only has an effect for SKIP_WHITESPACE.
174
     * @param options one or more of the following options, bitwise-OR-ed
175
     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
176
     */
177
    void skipIgnored(int32_t options);
178
179
    /**
180
     * Returns a string containing the remainder of the characters to be
181
     * returned by this iterator, without any option processing.  If the
182
     * iterator is currently within a variable expansion, this will only
183
     * extend to the end of the variable expansion.  This method is provided
184
     * so that iterators may interoperate with string-based APIs.  The typical
185
     * sequence of calls is to call skipIgnored(), then call lookahead(), then
186
     * parse the string returned by lookahead(), then call jumpahead() to
187
     * resynchronize the iterator.
188
     * @param result a string to receive the characters to be returned
189
     * by future calls to next()
190
     * @param maxLookAhead The maximum to copy into the result.
191
     * @return a reference to result
192
     */
193
    UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
194
195
    /**
196
     * Advances the position by the given number of 16-bit code units.
197
     * This is useful in conjunction with the lookahead() method.
198
     * @param count the number of 16-bit code units to jump over
199
     */
200
    void jumpahead(int32_t count);
201
202
    /**
203
     * Returns a string representation of this object, consisting of the
204
     * characters being iterated, with a '|' marking the current position.
205
     * Position within an expanded variable is <em>not</em> indicated.
206
     * @param result output parameter to receive a string
207
     * representation of this object
208
     */
209
//    UnicodeString& toString(UnicodeString& result) const;
210
    
211
private:
212
    /**
213
     * Returns the current 32-bit code point without parsing escapes, parsing
214
     * variables, or skipping whitespace.
215
     * @return the current 32-bit code point
216
     */
217
    UChar32 _current() const;
218
    
219
    /**
220
     * Advances the position by the given amount.
221
     * @param count the number of 16-bit code units to advance past
222
     */
223
    void _advance(int32_t count);
224
};
225
226
0
inline UBool RuleCharacterIterator::inVariable() const {
227
0
    return buf != 0;
228
0
}
229
230
U_NAMESPACE_END
231
232
#endif // _RULEITER_H_
233
//eof