Coverage Report

Created: 2023-03-04 07:00

/src/icu/icu4c/source/common/ruleiter.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (c) 2003-2011, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
* Author: Alan Liu
9
* Created: September 24 2003
10
* Since: ICU 2.8
11
**********************************************************************
12
*/
13
#include "ruleiter.h"
14
#include "unicode/parsepos.h"
15
#include "unicode/symtable.h"
16
#include "unicode/unistr.h"
17
#include "unicode/utf16.h"
18
#include "patternprops.h"
19
20
/* \U87654321 or \ud800\udc00 */
21
122k
#define MAX_U_NOTATION_LEN 12
22
23
U_NAMESPACE_BEGIN
24
25
RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
26
                      ParsePosition& thePos) :
27
    text(theText),
28
    pos(thePos),
29
    sym(theSym),
30
    buf(0),
31
    bufPos(0)
32
77.8k
{}
33
34
24.8M
UBool RuleCharacterIterator::atEnd() const {
35
24.8M
    return buf == 0 && pos.getIndex() == text.length();
36
24.8M
}
37
38
32.8M
UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
39
32.8M
    if (U_FAILURE(ec)) return DONE;
40
41
32.8M
    UChar32 c = DONE;
42
32.8M
    isEscaped = false;
43
44
32.8M
    for (;;) {
45
32.8M
        c = _current();
46
32.8M
        _advance(U16_LENGTH(c));
47
48
32.8M
        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
49
32.8M
            (options & PARSE_VARIABLES) != 0 && sym != 0) {
50
0
            UnicodeString name = sym->parseReference(text, pos, text.length());
51
            // If name is empty there was an isolated SYMBOL_REF;
52
            // return it.  Caller must be prepared for this.
53
0
            if (name.length() == 0) {
54
0
                break;
55
0
            }
56
0
            bufPos = 0;
57
0
            buf = sym->lookup(name);
58
0
            if (buf == 0) {
59
0
                ec = U_UNDEFINED_VARIABLE;
60
0
                return DONE;
61
0
            }
62
            // Handle empty variable value
63
0
            if (buf->length() == 0) {
64
0
                buf = 0;
65
0
            }
66
0
            continue;
67
0
        }
68
69
32.8M
        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
70
75.5k
            continue;
71
75.5k
        }
72
73
32.8M
        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
74
122k
            UnicodeString tempEscape;
75
122k
            int32_t offset = 0;
76
122k
            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
77
122k
            jumpahead(offset);
78
122k
            isEscaped = true;
79
122k
            if (c < 0) {
80
11
                ec = U_MALFORMED_UNICODE_ESCAPE;
81
11
                return DONE;
82
11
            }
83
122k
        }
84
85
32.8M
        break;
86
32.8M
    }
87
88
32.8M
    return c;
89
32.8M
}
90
91
15.1M
void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
92
15.1M
    p.buf = buf;
93
15.1M
    p.pos = pos.getIndex();
94
15.1M
    p.bufPos = bufPos;
95
15.1M
}
96
97
7.74M
void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
98
7.74M
    buf = p.buf;
99
7.74M
    pos.setIndex(p.pos);
100
7.74M
    bufPos = p.bufPos;
101
7.74M
}
102
103
224k
void RuleCharacterIterator::skipIgnored(int32_t options) {
104
224k
    if ((options & SKIP_WHITESPACE) != 0) {
105
74.4k
        for (;;) {
106
74.4k
            UChar32 a = _current();
107
74.4k
            if (!PatternProps::isWhiteSpace(a)) break;
108
13.3k
            _advance(U16_LENGTH(a));
109
13.3k
        }
110
61.0k
    }
111
224k
}
112
113
225k
UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
114
225k
    if (maxLookAhead < 0) {
115
103k
        maxLookAhead = 0x7FFFFFFF;
116
103k
    }
117
225k
    if (buf != 0) {
118
0
        buf->extract(bufPos, maxLookAhead, result);
119
225k
    } else {
120
225k
        text.extract(pos.getIndex(), maxLookAhead, result);
121
225k
    }
122
225k
    return result;
123
225k
}
124
125
210k
void RuleCharacterIterator::jumpahead(int32_t count) {
126
210k
    _advance(count);
127
210k
}
128
129
/*
130
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
131
    int32_t b = pos.getIndex();
132
    text.extract(0, b, result);
133
    return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
134
}
135
*/
136
137
32.9M
UChar32 RuleCharacterIterator::_current() const {
138
32.9M
    if (buf != 0) {
139
0
        return buf->char32At(bufPos);
140
32.9M
    } else {
141
32.9M
        int i = pos.getIndex();
142
32.9M
        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
143
32.9M
    }
144
32.9M
}
145
146
33.1M
void RuleCharacterIterator::_advance(int32_t count) {
147
33.1M
    if (buf != 0) {
148
0
        bufPos += count;
149
0
        if (bufPos == buf->length()) {
150
0
            buf = 0;
151
0
        }
152
33.1M
    } else {
153
33.1M
        pos.setIndex(pos.getIndex() + count);
154
33.1M
        if (pos.getIndex() > text.length()) {
155
0
            pos.setIndex(text.length());
156
0
        }
157
33.1M
    }
158
33.1M
}
159
160
U_NAMESPACE_END
161
162
//eof