Coverage Report

Created: 2025-06-13 06:38

/src/icu/icu4c/source/i18n/number_skeletons.h
Line
Count
Source (jump to first uncovered line)
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
#ifndef __SOURCE_NUMBER_SKELETONS_H__
8
#define __SOURCE_NUMBER_SKELETONS_H__
9
10
#include "number_types.h"
11
#include "numparse_types.h"
12
#include "unicode/ucharstrie.h"
13
#include "string_segment.h"
14
15
U_NAMESPACE_BEGIN
16
namespace number::impl {
17
18
// Forward-declaration
19
struct SeenMacroProps;
20
21
// namespace for enums and entrypoint functions
22
namespace skeleton {
23
24
////////////////////////////////////////////////////////////////////////////////////////
25
// NOTE: For examples of how to add a new stem to the number skeleton parser, see:    //
26
// https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
27
// and                                                                                //
28
// https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
29
////////////////////////////////////////////////////////////////////////////////////////
30
31
/**
32
 * While parsing a skeleton, this enum records what type of option we expect to find next.
33
 */
34
enum ParseState {
35
36
    // Section 0: We expect whitespace or a stem, but not an option:
37
38
    STATE_NULL,
39
40
    // Section 1: We might accept an option, but it is not required:
41
42
    STATE_SCIENTIFIC,
43
    STATE_FRACTION_PRECISION,
44
    STATE_PRECISION,
45
46
    // Section 2: An option is required:
47
48
    STATE_INCREMENT_PRECISION,
49
    STATE_MEASURE_UNIT,
50
    STATE_PER_MEASURE_UNIT,
51
    STATE_IDENTIFIER_UNIT,
52
    STATE_UNIT_USAGE,
53
    STATE_CURRENCY_UNIT,
54
    STATE_INTEGER_WIDTH,
55
    STATE_NUMBERING_SYSTEM,
56
    STATE_SCALE,
57
};
58
59
/**
60
 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
61
 * string literal written in upper snake case.
62
 *
63
 * @see StemToObject
64
 * @see #SERIALIZED_STEM_TRIE
65
 */
66
enum StemEnum {
67
68
    // Section 1: Stems that do not require an option:
69
70
    STEM_COMPACT_SHORT,
71
    STEM_COMPACT_LONG,
72
    STEM_SCIENTIFIC,
73
    STEM_ENGINEERING,
74
    STEM_NOTATION_SIMPLE,
75
    STEM_BASE_UNIT,
76
    STEM_PERCENT,
77
    STEM_PERMILLE,
78
    STEM_PERCENT_100, // concise-only
79
    STEM_PRECISION_INTEGER,
80
    STEM_PRECISION_UNLIMITED,
81
    STEM_PRECISION_CURRENCY_STANDARD,
82
    STEM_PRECISION_CURRENCY_CASH,
83
    STEM_ROUNDING_MODE_CEILING,
84
    STEM_ROUNDING_MODE_FLOOR,
85
    STEM_ROUNDING_MODE_DOWN,
86
    STEM_ROUNDING_MODE_UP,
87
    STEM_ROUNDING_MODE_HALF_EVEN,
88
    STEM_ROUNDING_MODE_HALF_ODD,
89
    STEM_ROUNDING_MODE_HALF_CEILING,
90
    STEM_ROUNDING_MODE_HALF_FLOOR,
91
    STEM_ROUNDING_MODE_HALF_DOWN,
92
    STEM_ROUNDING_MODE_HALF_UP,
93
    STEM_ROUNDING_MODE_UNNECESSARY,
94
    STEM_INTEGER_WIDTH_TRUNC,
95
    STEM_GROUP_OFF,
96
    STEM_GROUP_MIN2,
97
    STEM_GROUP_AUTO,
98
    STEM_GROUP_ON_ALIGNED,
99
    STEM_GROUP_THOUSANDS,
100
    STEM_LATIN,
101
    STEM_UNIT_WIDTH_NARROW,
102
    STEM_UNIT_WIDTH_SHORT,
103
    STEM_UNIT_WIDTH_FULL_NAME,
104
    STEM_UNIT_WIDTH_ISO_CODE,
105
    STEM_UNIT_WIDTH_FORMAL,
106
    STEM_UNIT_WIDTH_VARIANT,
107
    STEM_UNIT_WIDTH_HIDDEN,
108
    STEM_SIGN_AUTO,
109
    STEM_SIGN_ALWAYS,
110
    STEM_SIGN_NEVER,
111
    STEM_SIGN_ACCOUNTING,
112
    STEM_SIGN_ACCOUNTING_ALWAYS,
113
    STEM_SIGN_EXCEPT_ZERO,
114
    STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
115
    STEM_SIGN_NEGATIVE,
116
    STEM_SIGN_ACCOUNTING_NEGATIVE,
117
    STEM_DECIMAL_AUTO,
118
    STEM_DECIMAL_ALWAYS,
119
120
    // Section 2: Stems that DO require an option:
121
122
    STEM_PRECISION_INCREMENT,
123
    STEM_MEASURE_UNIT,
124
    STEM_PER_MEASURE_UNIT,
125
    STEM_UNIT,
126
    STEM_UNIT_USAGE,
127
    STEM_CURRENCY,
128
    STEM_INTEGER_WIDTH,
129
    STEM_NUMBERING_SYSTEM,
130
    STEM_SCALE,
131
};
132
133
/** Default wildcard char, accepted on input and printed in output */
134
constexpr char16_t kWildcardChar = u'*';
135
136
/** Alternative wildcard char, accept on input but not printed in output */
137
constexpr char16_t kAltWildcardChar = u'+';
138
139
/** Checks whether the char is a wildcard on input */
140
651
inline bool isWildcardChar(char16_t c) {
141
651
    return c == kWildcardChar || c == kAltWildcardChar;
142
651
}
143
144
/**
145
 * Creates a NumberFormatter corresponding to the given skeleton string.
146
 *
147
 * @param skeletonString
148
 *            A number skeleton string, possibly not in its shortest form.
149
 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
150
 */
151
UnlocalizedNumberFormatter create(
152
    const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
153
154
/**
155
 * Create a skeleton string corresponding to the given NumberFormatter.
156
 *
157
 * @param macros
158
 *            The NumberFormatter options object.
159
 * @return A skeleton string in normalized form.
160
 */
161
UnicodeString generate(const MacroProps& macros, UErrorCode& status);
162
163
/**
164
 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
165
 *
166
 * Internal: use the create() endpoint instead of this function.
167
 */
168
MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
169
170
/**
171
 * Given that the current segment represents a stem, parse it and save the result.
172
 *
173
 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
174
 */
175
ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
176
                     MacroProps& macros, UErrorCode& status);
177
178
/**
179
 * Given that the current segment represents an option, parse it and save the result.
180
 *
181
 * @return The next state after parsing this option, corresponding to what subset of options to
182
 *         expect next.
183
 */
184
ParseState
185
parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
186
187
} // namespace skeleton
188
189
190
/**
191
 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
192
 * applies to only the "Section 1" stems, those that are well-defined without an option.
193
 */
194
namespace stem_to_object {
195
196
Notation notation(skeleton::StemEnum stem);
197
198
MeasureUnit unit(skeleton::StemEnum stem);
199
200
Precision precision(skeleton::StemEnum stem);
201
202
UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
203
204
UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
205
206
UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
207
208
UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
209
210
UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
211
212
} // namespace stem_to_object
213
214
/**
215
 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
216
 * take place in the object_to_stem_string namespace.
217
 */
218
namespace enum_to_stem_string {
219
220
void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
221
222
void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
223
224
void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
225
226
void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
227
228
void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
229
230
} // namespace enum_to_stem_string
231
232
/**
233
 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
234
 */
235
namespace blueprint_helpers {
236
237
/** @return Whether we successfully found and parsed an exponent width option. */
238
bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
239
240
void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
241
242
/** @return Whether we successfully found and parsed an exponent sign option. */
243
bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
244
245
void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
246
247
void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
248
249
// "measure-unit/" is deprecated in favour of "unit/".
250
void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
251
252
// "per-measure-unit/" is deprecated in favour of "unit/".
253
void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
254
255
/**
256
 * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
257
 * specified via a "unit/" concise skeleton.
258
 */
259
void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
260
261
void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
262
263
void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
264
265
void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
266
267
void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
268
269
void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
270
271
void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
272
273
// Note: no generateScientificStem since this syntax was added later in ICU 67
274
275
void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
276
277
// Note: no generateIntegerStem since this syntax was added later in ICU 67
278
279
/** @return Whether we successfully found and parsed a frac-sig option. */
280
bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
281
282
/** @return Whether we successfully found and parsed a trailing zero option. */
283
bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
284
285
void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
286
287
void
288
generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status);
289
290
void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
291
292
void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
293
294
void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
295
296
void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
297
298
void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
299
300
void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
301
                              UErrorCode& status);
302
303
} // namespace blueprint_helpers
304
305
/**
306
 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
307
 * returns whether or not a token was written to the string builder.
308
 *
309
 * This needs to be a class, not a namespace, so it can be friended.
310
 */
311
class GeneratorHelpers {
312
  public:
313
    /**
314
     * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
315
     * StringBuilder.
316
     *
317
     * Internal: use the create() endpoint instead of this function.
318
     */
319
    static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
320
321
  private:
322
    static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
323
324
    static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
325
326
    static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
327
328
    static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
329
330
    static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
331
332
    static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
333
334
    static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
335
336
    static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
337
338
    static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
339
340
    static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
341
342
    static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
343
344
    static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
345
346
};
347
348
/**
349
 * Struct for null-checking.
350
 * In Java, we can just check the object reference. In C++, we need a different method.
351
 */
352
struct SeenMacroProps {
353
    bool notation = false;
354
    bool unit = false;
355
    bool perUnit = false;
356
    bool usage = false;
357
    bool precision = false;
358
    bool roundingMode = false;
359
    bool grouper = false;
360
    bool padder = false;
361
    bool integerWidth = false;
362
    bool symbols = false;
363
    bool unitWidth = false;
364
    bool sign = false;
365
    bool decimal = false;
366
    bool scale = false;
367
};
368
369
namespace {
370
371
1.57k
#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
372
1.57k
UPRV_BLOCK_MACRO_BEGIN { \
373
1.57k
    UErrorCode conversionStatus = U_ZERO_ERROR; \
374
1.57k
    (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
375
1.57k
    if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
376
20
        /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
377
20
        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
378
20
        return; \
379
1.55k
    } else if (U_FAILURE(conversionStatus)) { \
380
0
        (status) = conversionStatus; \
381
0
        return; \
382
0
    } \
383
1.57k
} UPRV_BLOCK_MACRO_END
384
385
} // namespace
386
387
} // namespace number::impl
388
U_NAMESPACE_END
389
390
#endif //__SOURCE_NUMBER_SKELETONS_H__
391
#endif /* #if !UCONFIG_NO_FORMATTING */