/src/icu/source/i18n/string_segment.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2018 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | #include "unicode/utypes.h"  | 
5  |  |  | 
6  |  | #if !UCONFIG_NO_FORMATTING  | 
7  |  |  | 
8  |  | // Allow implicit conversion from char16_t* to UnicodeString for this file:  | 
9  |  | // Helpful in toString methods and elsewhere.  | 
10  |  | #define UNISTR_FROM_STRING_EXPLICIT  | 
11  |  |  | 
12  |  | #include "numparse_types.h"  | 
13  |  | #include "string_segment.h"  | 
14  |  | #include "putilimp.h"  | 
15  |  | #include "unicode/utf16.h"  | 
16  |  | #include "unicode/uniset.h"  | 
17  |  |  | 
18  |  | U_NAMESPACE_BEGIN  | 
19  |  |  | 
20  |  |  | 
21  |  | StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)  | 
22  | 0  |         : fStr(str), fStart(0), fEnd(str.length()),  | 
23  | 0  |           fFoldCase(ignoreCase) {} | 
24  |  |  | 
25  | 0  | int32_t StringSegment::getOffset() const { | 
26  | 0  |     return fStart;  | 
27  | 0  | }  | 
28  |  |  | 
29  | 0  | void StringSegment::setOffset(int32_t start) { | 
30  | 0  |     fStart = start;  | 
31  | 0  | }  | 
32  |  |  | 
33  | 0  | void StringSegment::adjustOffset(int32_t delta) { | 
34  | 0  |     fStart += delta;  | 
35  | 0  | }  | 
36  |  |  | 
37  | 0  | void StringSegment::adjustOffsetByCodePoint() { | 
38  | 0  |     fStart += U16_LENGTH(getCodePoint());  | 
39  | 0  | }  | 
40  |  |  | 
41  | 0  | void StringSegment::setLength(int32_t length) { | 
42  | 0  |     fEnd = fStart + length;  | 
43  | 0  | }  | 
44  |  |  | 
45  | 0  | void StringSegment::resetLength() { | 
46  | 0  |     fEnd = fStr.length();  | 
47  | 0  | }  | 
48  |  |  | 
49  | 0  | int32_t StringSegment::length() const { | 
50  | 0  |     return fEnd - fStart;  | 
51  | 0  | }  | 
52  |  |  | 
53  | 0  | char16_t StringSegment::charAt(int32_t index) const { | 
54  | 0  |     return fStr.charAt(index + fStart);  | 
55  | 0  | }  | 
56  |  |  | 
57  | 0  | UChar32 StringSegment::codePointAt(int32_t index) const { | 
58  | 0  |     return fStr.char32At(index + fStart);  | 
59  | 0  | }  | 
60  |  |  | 
61  | 0  | UnicodeString StringSegment::toUnicodeString() const { | 
62  | 0  |     return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);  | 
63  | 0  | }  | 
64  |  |  | 
65  | 0  | const UnicodeString StringSegment::toTempUnicodeString() const { | 
66  |  |     // Use the readonly-aliasing constructor for efficiency.  | 
67  | 0  |     return UnicodeString(FALSE, fStr.getBuffer() + fStart, fEnd - fStart);  | 
68  | 0  | }  | 
69  |  |  | 
70  | 0  | UChar32 StringSegment::getCodePoint() const { | 
71  | 0  |     char16_t lead = fStr.charAt(fStart);  | 
72  | 0  |     if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) { | 
73  | 0  |         return fStr.char32At(fStart);  | 
74  | 0  |     } else if (U16_IS_SURROGATE(lead)) { | 
75  | 0  |         return -1;  | 
76  | 0  |     } else { | 
77  | 0  |         return lead;  | 
78  | 0  |     }  | 
79  | 0  | }  | 
80  |  |  | 
81  | 0  | bool StringSegment::startsWith(UChar32 otherCp) const { | 
82  | 0  |     return codePointsEqual(getCodePoint(), otherCp, fFoldCase);  | 
83  | 0  | }  | 
84  |  |  | 
85  | 0  | bool StringSegment::startsWith(const UnicodeSet& uniset) const { | 
86  |  |     // TODO: Move UnicodeSet case-folding logic here.  | 
87  |  |     // TODO: Handle string matches here instead of separately.  | 
88  | 0  |     UChar32 cp = getCodePoint();  | 
89  | 0  |     if (cp == -1) { | 
90  | 0  |         return false;  | 
91  | 0  |     }  | 
92  | 0  |     return uniset.contains(cp);  | 
93  | 0  | }  | 
94  |  |  | 
95  | 0  | bool StringSegment::startsWith(const UnicodeString& other) const { | 
96  | 0  |     if (other.isBogus() || other.length() == 0 || length() == 0) { | 
97  | 0  |         return false;  | 
98  | 0  |     }  | 
99  | 0  |     int cp1 = getCodePoint();  | 
100  | 0  |     int cp2 = other.char32At(0);  | 
101  | 0  |     return codePointsEqual(cp1, cp2, fFoldCase);  | 
102  | 0  | }  | 
103  |  |  | 
104  | 0  | int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) { | 
105  | 0  |     return getPrefixLengthInternal(other, fFoldCase);  | 
106  | 0  | }  | 
107  |  |  | 
108  | 0  | int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) { | 
109  | 0  |     return getPrefixLengthInternal(other, false);  | 
110  | 0  | }  | 
111  |  |  | 
112  | 0  | int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) { | 
113  | 0  |     U_ASSERT(other.length() > 0);  | 
114  | 0  |     int32_t offset = 0;  | 
115  | 0  |     for (; offset < uprv_min(length(), other.length());) { | 
116  |  |         // TODO: case-fold code points, not chars  | 
117  | 0  |         char16_t c1 = charAt(offset);  | 
118  | 0  |         char16_t c2 = other.charAt(offset);  | 
119  | 0  |         if (!codePointsEqual(c1, c2, foldCase)) { | 
120  | 0  |             break;  | 
121  | 0  |         }  | 
122  | 0  |         offset++;  | 
123  | 0  |     }  | 
124  | 0  |     return offset;  | 
125  | 0  | }  | 
126  |  |  | 
127  | 0  | bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) { | 
128  | 0  |     if (cp1 == cp2) { | 
129  | 0  |         return true;  | 
130  | 0  |     }  | 
131  | 0  |     if (!foldCase) { | 
132  | 0  |         return false;  | 
133  | 0  |     }  | 
134  | 0  |     cp1 = u_foldCase(cp1, TRUE);  | 
135  | 0  |     cp2 = u_foldCase(cp2, TRUE);  | 
136  | 0  |     return cp1 == cp2;  | 
137  | 0  | }  | 
138  |  |  | 
139  | 0  | bool StringSegment::operator==(const UnicodeString& other) const { | 
140  | 0  |     return toTempUnicodeString() == other;  | 
141  | 0  | }  | 
142  |  |  | 
143  |  |  | 
144  |  | U_NAMESPACE_END  | 
145  |  | #endif /* #if !UCONFIG_NO_FORMATTING */  |