Coverage Report

Created: 2025-06-24 06:54

/src/icu/icu4c/source/common/util.h
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 **********************************************************************
5
 *   Copyright (c) 2001-2011, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 **********************************************************************
8
 *   Date        Name        Description
9
 *   11/19/2001  aliu        Creation.
10
 **********************************************************************
11
 */
12
13
#ifndef ICU_UTIL_H
14
#define ICU_UTIL_H
15
16
#include "charstr.h"
17
#include "unicode/unistr.h"
18
#include "unicode/uobject.h"
19
#include "unicode/utypes.h"
20
//--------------------------------------------------------------------
21
// class ICU_Utility
22
// i18n utility functions, scoped into the class ICU_Utility.
23
//--------------------------------------------------------------------
24
25
U_NAMESPACE_BEGIN
26
27
class UnicodeMatcher;
28
29
class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
30
 public:
31
32
    /**
33
     * Append a number to the given UnicodeString in the given radix.
34
     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
35
     * radices 11 through 36.
36
     * @param result the digits of the number are appended here
37
     * @param n the number to be converted to digits; may be negative.
38
     * If negative, a '-' is prepended to the digits.
39
     * @param radix a radix from 2 to 36 inclusive.
40
     * @param minDigits the minimum number of digits, not including
41
     * any '-', to produce.  Values less than 2 have no effect.  One
42
     * digit is always emitted regardless of this parameter.
43
     * @return a reference to result
44
     */
45
    static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
46
                                       int32_t radix = 10,
47
                                       int32_t minDigits = 1);
48
49
    /** Returns a bogus UnicodeString by value. */
50
1.49M
    static inline UnicodeString makeBogusString() {
51
1.49M
        UnicodeString result;
52
1.49M
        result.setToBogus();
53
1.49M
        return result;
54
1.49M
    }
55
56
    /**
57
     * Return true if the character is NOT printable ASCII.
58
     * The tab, newline and linefeed characters are considered unprintable.
59
     */
60
    static UBool isUnprintable(UChar32 c);
61
62
    /**
63
     * @return true for control codes and for surrogate and noncharacter code points
64
     */
65
    static UBool shouldAlwaysBeEscaped(UChar32 c);
66
67
    /**
68
     * Escapes one unprintable code point using \uxxxx notation for U+0000 to
69
     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
70
     * printable ASCII, then do nothing and return false.  Otherwise,
71
     * append the escaped notation and return true.
72
     */
73
    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
74
75
    /**
76
     * Escapes one code point using \uxxxx notation
77
     * for U+0000 to U+FFFF and \Uxxxxxxxx for U+10000 and above.
78
     * @return result
79
     */
80
    static UnicodeString &escape(UnicodeString& result, UChar32 c);
81
82
    /**
83
     * Returns the index of a character, ignoring quoted text.
84
     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
85
     * found by a search for 'h'.
86
     * @param text text to be searched
87
     * @param start the beginning index, inclusive; <code>0 <= start
88
     * <= limit</code>.
89
     * @param limit the ending index, exclusive; <code>start <= limit
90
     * <= text.length()</code>.
91
     * @param c character to search for
92
     * @return Offset of the first instance of c, or -1 if not found.
93
     */
94
//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
95
//    static int32_t quotedIndexOf(const UnicodeString& text,
96
//                                 int32_t start, int32_t limit,
97
//                                 char16_t c);
98
99
    /**
100
     * Skip over a sequence of zero or more white space characters at pos.
101
     * @param advance if true, advance pos to the first non-white-space
102
     * character at or after pos, or str.length(), if there is none.
103
     * Otherwise leave pos unchanged.
104
     * @return the index of the first non-white-space character at or
105
     * after pos, or str.length(), if there is none.
106
     */
107
    static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
108
                                  UBool advance = false);
109
110
    /**
111
     * Skip over Pattern_White_Space in a Replaceable.
112
     * Skipping may be done in the forward or
113
     * reverse direction.  In either case, the leftmost index will be
114
     * inclusive, and the rightmost index will be exclusive.  That is,
115
     * given a range defined as [start, limit), the call
116
     * skipWhitespace(text, start, limit) will advance start past leading
117
     * whitespace, whereas the call skipWhitespace(text, limit, start),
118
     * will back up limit past trailing whitespace.
119
     * @param text the text to be analyzed
120
     * @param pos either the start or limit of a range of 'text', to skip
121
     * leading or trailing whitespace, respectively
122
     * @param stop either the limit or start of a range of 'text', to skip
123
     * leading or trailing whitespace, respectively
124
     * @return the new start or limit, depending on what was passed in to
125
     * 'pos'
126
     */
127
//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
128
//?    static int32_t skipWhitespace(const Replaceable& text,
129
//?                                  int32_t pos, int32_t stop);
130
131
    /**
132
     * Parse a single non-whitespace character 'ch', optionally
133
     * preceded by whitespace.
134
     * @param id the string to be parsed
135
     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
136
     * offset of the first character to be parsed.  On output, pos[0]
137
     * is the index after the last parsed character.  If the parse
138
     * fails, pos[0] will be unchanged.
139
     * @param ch the non-whitespace character to be parsed.
140
     * @return true if 'ch' is seen preceded by zero or more
141
     * whitespace characters.
142
     */
143
    static UBool parseChar(const UnicodeString& id, int32_t& pos, char16_t ch);
144
145
    /**
146
     * Parse a pattern string starting at offset pos.  Keywords are
147
     * matched case-insensitively.  Spaces may be skipped and may be
148
     * optional or required.  Integer values may be parsed, and if
149
     * they are, they will be returned in the given array.  If
150
     * successful, the offset of the next non-space character is
151
     * returned.  On failure, -1 is returned.
152
     * @param pattern must only contain lowercase characters, which
153
     * will match their uppercase equivalents as well.  A space
154
     * character matches one or more required spaces.  A '~' character
155
     * matches zero or more optional spaces.  A '#' character matches
156
     * an integer and stores it in parsedInts, which the caller must
157
     * ensure has enough capacity.
158
     * @param parsedInts array to receive parsed integers.  Caller
159
     * must ensure that parsedInts.length is >= the number of '#'
160
     * signs in 'pattern'.
161
     * @return the position after the last character parsed, or -1 if
162
     * the parse failed
163
     */
164
    static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
165
                                const UnicodeString& pattern, int32_t* parsedInts);
166
        
167
    /**
168
     * Parse a pattern string within the given Replaceable and a parsing
169
     * pattern.  Characters are matched literally and case-sensitively
170
     * except for the following special characters:
171
     *
172
     * ~  zero or more Pattern_White_Space chars
173
     *
174
     * If end of pattern is reached with all matches along the way,
175
     * pos is advanced to the first unparsed index and returned.
176
     * Otherwise -1 is returned.
177
     * @param pat pattern that controls parsing
178
     * @param text text to be parsed, starting at index
179
     * @param index offset to first character to parse
180
     * @param limit offset after last character to parse
181
     * @return index after last parsed character, or -1 on parse failure.
182
     */
183
    static int32_t parsePattern(const UnicodeString& pat,
184
                                const Replaceable& text,
185
                                int32_t index,
186
                                int32_t limit);
187
188
    /**
189
     * Parse an integer at pos, either of the form \d+ or of the form
190
     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
191
     * or octal format.
192
     * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
193
     * character to parse.  On output, the index of the character after the
194
     * last parsed character.
195
     */
196
    static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
197
198
    /**
199
     * Parse an integer at pos using only ASCII digits.
200
     * Base 10 only.
201
     * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
202
     * character to parse.  On output, the index of the character after the
203
     * last parsed character.
204
     */
205
    static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
206
207
    /**
208
     * Parse a Unicode identifier from the given string at the given
209
     * position.  Return the identifier, or an empty string if there
210
     * is no identifier.
211
     * @param str the string to parse
212
     * @param pos INPUT-OUTPUT parameter.  On INPUT, pos is the
213
     * first character to examine.  It must be less than str.length(),
214
     * and it must not point to a whitespace character.  That is, must
215
     * have pos < str.length() and
216
     * !UCharacter::isWhitespace(str.char32At(pos)).  On
217
     * OUTPUT, the position after the last parsed character.
218
     * @return the Unicode identifier, or an empty string if there is
219
     * no valid identifier at pos.
220
     */
221
    static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
222
223
    /**
224
     * Parse an unsigned 31-bit integer at the given offset.  Use
225
     * UCharacter.digit() to parse individual characters into digits.
226
     * @param text the text to be parsed
227
     * @param pos INPUT-OUTPUT parameter.  On entry, pos is the
228
     * offset within text at which to start parsing; it should point
229
     * to a valid digit.  On exit, pos is the offset after the last
230
     * parsed character.  If the parse failed, it will be unchanged on
231
     * exit.  Must be >= 0 on entry.
232
     * @param radix the radix in which to parse; must be >= 2 and <=
233
     * 36.
234
     * @return a non-negative parsed number, or -1 upon parse failure.
235
     * Parse fails if there are no digits, that is, if pos does not
236
     * point to a valid digit on entry, or if the number to be parsed
237
     * does not fit into a 31-bit unsigned integer.
238
     */
239
    static int32_t parseNumber(const UnicodeString& text,
240
                               int32_t& pos, int8_t radix);
241
242
    static void appendToRule(UnicodeString& rule,
243
                             UChar32 c,
244
                             UBool isLiteral,
245
                             UBool escapeUnprintable,
246
                             UnicodeString& quoteBuf);
247
    
248
    static void appendToRule(UnicodeString& rule,
249
                             const UnicodeString& text,
250
                             UBool isLiteral,
251
                             UBool escapeUnprintable,
252
                             UnicodeString& quoteBuf);
253
254
    static void appendToRule(UnicodeString& rule,
255
                             const UnicodeMatcher* matcher,
256
                             UBool escapeUnprintable,
257
                             UnicodeString& quoteBuf);
258
259
private:
260
    // do not instantiate
261
    ICU_Utility() = delete;
262
};
263
264
U_NAMESPACE_END
265
266
#endif
267
//eof