/src/icu/source/common/ruleiter.cpp

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2003-2011, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: September 24 2003
* Since: ICU 2.8
**********************************************************************
*/
#include "ruleiter.h"
#include "unicode/parsepos.h"
#include "unicode/symtable.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#include "patternprops.h"

/* \U87654321 or \ud800\udc00 */
#define MAX_U_NOTATION_LEN 12

U_NAMESPACE_BEGIN

RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
                      ParsePosition& thePos) :
    text(theText),
    pos(thePos),
    sym(theSym),
    buf(0),
    bufPos(0)
{}

UBool RuleCharacterIterator::atEnd() const {
    return buf == 0 && pos.getIndex() == text.length();
}

UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
    if (U_FAILURE(ec)) return DONE;

    UChar32 c = DONE;
    isEscaped = FALSE;

    for (;;) {
        c = _current();
        _advance(U16_LENGTH(c));

        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
            (options & PARSE_VARIABLES) != 0 && sym != 0) {
            UnicodeString name = sym->parseReference(text, pos, text.length());
            // If name is empty there was an isolated SYMBOL_REF;
            // return it.  Caller must be prepared for this.
            if (name.length() == 0) {
                break;
            }
            bufPos = 0;
            buf = sym->lookup(name);
            if (buf == 0) {
                ec = U_UNDEFINED_VARIABLE;
                return DONE;
            }
            // Handle empty variable value
            if (buf->length() == 0) {
                buf = 0;
            }
            continue;
        }

        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
            continue;
        }

        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
            UnicodeString tempEscape;
            int32_t offset = 0;
            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
            jumpahead(offset);
            isEscaped = TRUE;
            if (c < 0) {
                ec = U_MALFORMED_UNICODE_ESCAPE;
                return DONE;
            }
        }

        break;
    }

    return c;
}

void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
    p.buf = buf;
    p.pos = pos.getIndex();
    p.bufPos = bufPos;
}

void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
    buf = p.buf;
    pos.setIndex(p.pos);
    bufPos = p.bufPos;
}

void RuleCharacterIterator::skipIgnored(int32_t options) {
    if ((options & SKIP_WHITESPACE) != 0) {
        for (;;) {
            UChar32 a = _current();
            if (!PatternProps::isWhiteSpace(a)) break;
            _advance(U16_LENGTH(a));
        }
    }
}

UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    if (maxLookAhead < 0) {
        maxLookAhead = 0x7FFFFFFF;
    }
    if (buf != 0) {
        buf->extract(bufPos, maxLookAhead, result);
    } else {
        text.extract(pos.getIndex(), maxLookAhead, result);
    }
    return result;
}

void RuleCharacterIterator::jumpahead(int32_t count) {
    _advance(count);
}

/*
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    int32_t b = pos.getIndex();
    text.extract(0, b, result);
    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
}
*/

UChar32 RuleCharacterIterator::_current() const {
    if (buf != 0) {
        return buf->char32At(bufPos);
    } else {
        int i = pos.getIndex();
        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
    }
}

void RuleCharacterIterator::_advance(int32_t count) {
    if (buf != 0) {
        bufPos += count;
        if (bufPos == buf->length()) {
            buf = 0;
        }
    } else {
        pos.setIndex(pos.getIndex() + count);
        if (pos.getIndex() > text.length()) {
            pos.setIndex(text.length());
        }
    }
}

U_NAMESPACE_END

//eof

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		**********************************************************************
5		* Copyright (c) 2003-2011, International Business Machines
6		* Corporation and others. All Rights Reserved.
7		**********************************************************************
8		* Author: Alan Liu
9		* Created: September 24 2003
10		* Since: ICU 2.8
11		**********************************************************************
12		*/
13		#include "ruleiter.h"
14		#include "unicode/parsepos.h"
15		#include "unicode/symtable.h"
16		#include "unicode/unistr.h"
17		#include "unicode/utf16.h"
18		#include "patternprops.h"
19
20		/* \U87654321 or \ud800\udc00 */
21	0	#define MAX_U_NOTATION_LEN 12
22
23		U_NAMESPACE_BEGIN
24
25		RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
26		ParsePosition& thePos) :
27	0	text(theText),
28	0	pos(thePos),
29	0	sym(theSym),
30	0	buf(0),
31	0	bufPos(0)
32	0	{}
33
34	0	UBool RuleCharacterIterator::atEnd() const {
35	0	return buf == 0 && pos.getIndex() == text.length();
36	0	}
37
38	0	UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
39	0	if (U_FAILURE(ec)) return DONE;
40
41	0	UChar32 c = DONE;
42	0	isEscaped = FALSE;
43
44	0	for (;;) {
45	0	c = _current();
46	0	_advance(U16_LENGTH(c));
47
48	0	if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
49	0	(options & PARSE_VARIABLES) != 0 && sym != 0) {
50	0	UnicodeString name = sym->parseReference(text, pos, text.length());
51		// If name is empty there was an isolated SYMBOL_REF;
52		// return it. Caller must be prepared for this.
53	0	if (name.length() == 0) {
54	0	break;
55	0	}
56	0	bufPos = 0;
57	0	buf = sym->lookup(name);
58	0	if (buf == 0) {
59	0	ec = U_UNDEFINED_VARIABLE;
60	0	return DONE;
61	0	}
62		// Handle empty variable value
63	0	if (buf->length() == 0) {
64	0	buf = 0;
65	0	}
66	0	continue;
67	0	}
68
69	0	if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
70	0	continue;
71	0	}
72
73	0	if (c == 0x5C /'\\'/ && (options & PARSE_ESCAPES) != 0) {
74	0	UnicodeString tempEscape;
75	0	int32_t offset = 0;
76	0	c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
77	0	jumpahead(offset);
78	0	isEscaped = TRUE;
79	0	if (c < 0) {
80	0	ec = U_MALFORMED_UNICODE_ESCAPE;
81	0	return DONE;
82	0	}
83	0	}
84
85	0	break;
86	0	}
87
88	0	return c;
89	0	}
90
91	0	void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
92	0	p.buf = buf;
93	0	p.pos = pos.getIndex();
94	0	p.bufPos = bufPos;
95	0	}
96
97	0	void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
98	0	buf = p.buf;
99	0	pos.setIndex(p.pos);
100	0	bufPos = p.bufPos;
101	0	}
102
103	0	void RuleCharacterIterator::skipIgnored(int32_t options) {
104	0	if ((options & SKIP_WHITESPACE) != 0) {
105	0	for (;;) {
106	0	UChar32 a = _current();
107	0	if (!PatternProps::isWhiteSpace(a)) break;
108	0	_advance(U16_LENGTH(a));
109	0	}
110	0	}
111	0	}
112
113	0	UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
114	0	if (maxLookAhead < 0) {
115	0	maxLookAhead = 0x7FFFFFFF;
116	0	}
117	0	if (buf != 0) {
118	0	buf->extract(bufPos, maxLookAhead, result);
119	0	} else {
120	0	text.extract(pos.getIndex(), maxLookAhead, result);
121	0	}
122	0	return result;
123	0	}
124
125	0	void RuleCharacterIterator::jumpahead(int32_t count) {
126	0	_advance(count);
127	0	}
128
129		/*
130		UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
131		int32_t b = pos.getIndex();
132		text.extract(0, b, result);
133		return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '\|' at index
134		}
135		*/
136
137	0	UChar32 RuleCharacterIterator::_current() const {
138	0	if (buf != 0) {
139	0	return buf->char32At(bufPos);
140	0	} else {
141	0	int i = pos.getIndex();
142	0	return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
143	0	}
144	0	}
145
146	0	void RuleCharacterIterator::_advance(int32_t count) {
147	0	if (buf != 0) {
148	0	bufPos += count;
149	0	if (bufPos == buf->length()) {
150	0	buf = 0;
151	0	}
152	0	} else {
153	0	pos.setIndex(pos.getIndex() + count);
154	0	if (pos.getIndex() > text.length()) {
155	0	pos.setIndex(text.length());
156	0	}
157	0	}
158	0	}
159
160		U_NAMESPACE_END
161
162		//eof

Coverage Report

Created: 2025-06-24 06:43