Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/number_skeletons.h
Line
Count
Source (jump to first uncovered line)
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
#ifndef __SOURCE_NUMBER_SKELETONS_H__
8
#define __SOURCE_NUMBER_SKELETONS_H__
9
10
#include "number_types.h"
11
#include "numparse_types.h"
12
#include "unicode/ucharstrie.h"
13
#include "string_segment.h"
14
15
U_NAMESPACE_BEGIN
16
namespace number {
17
namespace impl {
18
19
// Forward-declaration
20
struct SeenMacroProps;
21
22
// namespace for enums and entrypoint functions
23
namespace skeleton {
24
25
////////////////////////////////////////////////////////////////////////////////////////
26
// NOTE: For examples of how to add a new stem to the number skeleton parser, see:    //
27
// https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
28
// and                                                                                //
29
// https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
30
////////////////////////////////////////////////////////////////////////////////////////
31
32
/**
33
 * While parsing a skeleton, this enum records what type of option we expect to find next.
34
 */
35
enum ParseState {
36
37
    // Section 0: We expect whitespace or a stem, but not an option:
38
39
    STATE_NULL,
40
41
    // Section 1: We might accept an option, but it is not required:
42
43
    STATE_SCIENTIFIC,
44
    STATE_FRACTION_PRECISION,
45
    STATE_PRECISION,
46
47
    // Section 2: An option is required:
48
49
    STATE_INCREMENT_PRECISION,
50
    STATE_MEASURE_UNIT,
51
    STATE_PER_MEASURE_UNIT,
52
    STATE_IDENTIFIER_UNIT,
53
    STATE_UNIT_USAGE,
54
    STATE_CURRENCY_UNIT,
55
    STATE_INTEGER_WIDTH,
56
    STATE_NUMBERING_SYSTEM,
57
    STATE_SCALE,
58
};
59
60
/**
61
 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
62
 * string literal written in upper snake case.
63
 *
64
 * @see StemToObject
65
 * @see #SERIALIZED_STEM_TRIE
66
 */
67
enum StemEnum {
68
69
    // Section 1: Stems that do not require an option:
70
71
    STEM_COMPACT_SHORT,
72
    STEM_COMPACT_LONG,
73
    STEM_SCIENTIFIC,
74
    STEM_ENGINEERING,
75
    STEM_NOTATION_SIMPLE,
76
    STEM_BASE_UNIT,
77
    STEM_PERCENT,
78
    STEM_PERMILLE,
79
    STEM_PERCENT_100, // concise-only
80
    STEM_PRECISION_INTEGER,
81
    STEM_PRECISION_UNLIMITED,
82
    STEM_PRECISION_CURRENCY_STANDARD,
83
    STEM_PRECISION_CURRENCY_CASH,
84
    STEM_ROUNDING_MODE_CEILING,
85
    STEM_ROUNDING_MODE_FLOOR,
86
    STEM_ROUNDING_MODE_DOWN,
87
    STEM_ROUNDING_MODE_UP,
88
    STEM_ROUNDING_MODE_HALF_EVEN,
89
    STEM_ROUNDING_MODE_HALF_ODD,
90
    STEM_ROUNDING_MODE_HALF_CEILING,
91
    STEM_ROUNDING_MODE_HALF_FLOOR,
92
    STEM_ROUNDING_MODE_HALF_DOWN,
93
    STEM_ROUNDING_MODE_HALF_UP,
94
    STEM_ROUNDING_MODE_UNNECESSARY,
95
    STEM_INTEGER_WIDTH_TRUNC,
96
    STEM_GROUP_OFF,
97
    STEM_GROUP_MIN2,
98
    STEM_GROUP_AUTO,
99
    STEM_GROUP_ON_ALIGNED,
100
    STEM_GROUP_THOUSANDS,
101
    STEM_LATIN,
102
    STEM_UNIT_WIDTH_NARROW,
103
    STEM_UNIT_WIDTH_SHORT,
104
    STEM_UNIT_WIDTH_FULL_NAME,
105
    STEM_UNIT_WIDTH_ISO_CODE,
106
    STEM_UNIT_WIDTH_FORMAL,
107
    STEM_UNIT_WIDTH_VARIANT,
108
    STEM_UNIT_WIDTH_HIDDEN,
109
    STEM_SIGN_AUTO,
110
    STEM_SIGN_ALWAYS,
111
    STEM_SIGN_NEVER,
112
    STEM_SIGN_ACCOUNTING,
113
    STEM_SIGN_ACCOUNTING_ALWAYS,
114
    STEM_SIGN_EXCEPT_ZERO,
115
    STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
116
    STEM_SIGN_NEGATIVE,
117
    STEM_SIGN_ACCOUNTING_NEGATIVE,
118
    STEM_DECIMAL_AUTO,
119
    STEM_DECIMAL_ALWAYS,
120
121
    // Section 2: Stems that DO require an option:
122
123
    STEM_PRECISION_INCREMENT,
124
    STEM_MEASURE_UNIT,
125
    STEM_PER_MEASURE_UNIT,
126
    STEM_UNIT,
127
    STEM_UNIT_USAGE,
128
    STEM_CURRENCY,
129
    STEM_INTEGER_WIDTH,
130
    STEM_NUMBERING_SYSTEM,
131
    STEM_SCALE,
132
};
133
134
/** Default wildcard char, accepted on input and printed in output */
135
constexpr char16_t kWildcardChar = u'*';
136
137
/** Alternative wildcard char, accept on input but not printed in output */
138
constexpr char16_t kAltWildcardChar = u'+';
139
140
/** Checks whether the char is a wildcard on input */
141
0
inline bool isWildcardChar(char16_t c) {
142
0
    return c == kWildcardChar || c == kAltWildcardChar;
143
0
}
144
145
/**
146
 * Creates a NumberFormatter corresponding to the given skeleton string.
147
 *
148
 * @param skeletonString
149
 *            A number skeleton string, possibly not in its shortest form.
150
 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
151
 */
152
UnlocalizedNumberFormatter create(
153
    const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
154
155
/**
156
 * Create a skeleton string corresponding to the given NumberFormatter.
157
 *
158
 * @param macros
159
 *            The NumberFormatter options object.
160
 * @return A skeleton string in normalized form.
161
 */
162
UnicodeString generate(const MacroProps& macros, UErrorCode& status);
163
164
/**
165
 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
166
 *
167
 * Internal: use the create() endpoint instead of this function.
168
 */
169
MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
170
171
/**
172
 * Given that the current segment represents a stem, parse it and save the result.
173
 *
174
 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
175
 */
176
ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
177
                     MacroProps& macros, UErrorCode& status);
178
179
/**
180
 * Given that the current segment represents an option, parse it and save the result.
181
 *
182
 * @return The next state after parsing this option, corresponding to what subset of options to
183
 *         expect next.
184
 */
185
ParseState
186
parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
187
188
} // namespace skeleton
189
190
191
/**
192
 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
193
 * applies to only the "Section 1" stems, those that are well-defined without an option.
194
 */
195
namespace stem_to_object {
196
197
Notation notation(skeleton::StemEnum stem);
198
199
MeasureUnit unit(skeleton::StemEnum stem);
200
201
Precision precision(skeleton::StemEnum stem);
202
203
UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
204
205
UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
206
207
UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
208
209
UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
210
211
UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
212
213
} // namespace stem_to_object
214
215
/**
216
 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
217
 * take place in the object_to_stem_string namespace.
218
 */
219
namespace enum_to_stem_string {
220
221
void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
222
223
void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
224
225
void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
226
227
void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
228
229
void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
230
231
} // namespace enum_to_stem_string
232
233
/**
234
 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
235
 */
236
namespace blueprint_helpers {
237
238
/** @return Whether we successfully found and parsed an exponent width option. */
239
bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
240
241
void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
242
243
/** @return Whether we successfully found and parsed an exponent sign option. */
244
bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
245
246
void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
247
248
void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
249
250
// "measure-unit/" is deprecated in favour of "unit/".
251
void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
252
253
// "per-measure-unit/" is deprecated in favour of "unit/".
254
void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
255
256
/**
257
 * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
258
 * specified via a "unit/" concise skeleton.
259
 */
260
void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
261
262
void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
263
264
void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
265
266
void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
267
268
void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
269
270
void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
271
272
void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
273
274
// Note: no generateScientificStem since this syntax was added later in ICU 67
275
276
void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
277
278
// Note: no generateIntegerStem since this syntax was added later in ICU 67
279
280
/** @return Whether we successfully found and parsed a frac-sig option. */
281
bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
282
283
/** @return Whether we successfully found and parsed a trailing zero option. */
284
bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
285
286
void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
287
288
void
289
generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
290
291
void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
292
293
void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
294
295
void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
296
297
void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
298
299
void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
300
301
void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
302
                              UErrorCode& status);
303
304
} // namespace blueprint_helpers
305
306
/**
307
 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
308
 * returns whether or not a token was written to the string builder.
309
 *
310
 * This needs to be a class, not a namespace, so it can be friended.
311
 */
312
class GeneratorHelpers {
313
  public:
314
    /**
315
     * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
316
     * StringBuilder.
317
     *
318
     * Internal: use the create() endpoint instead of this function.
319
     */
320
    static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
321
322
  private:
323
    static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
324
325
    static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
326
327
    static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
328
329
    static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
330
331
    static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
332
333
    static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
334
335
    static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
336
337
    static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
338
339
    static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
340
341
    static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
342
343
    static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
344
345
    static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
346
347
};
348
349
/**
350
 * Struct for null-checking.
351
 * In Java, we can just check the object reference. In C++, we need a different method.
352
 */
353
struct SeenMacroProps {
354
    bool notation = false;
355
    bool unit = false;
356
    bool perUnit = false;
357
    bool usage = false;
358
    bool precision = false;
359
    bool roundingMode = false;
360
    bool grouper = false;
361
    bool padder = false;
362
    bool integerWidth = false;
363
    bool symbols = false;
364
    bool unitWidth = false;
365
    bool sign = false;
366
    bool decimal = false;
367
    bool scale = false;
368
};
369
370
namespace {
371
372
0
#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
373
0
UPRV_BLOCK_MACRO_BEGIN { \
374
0
    UErrorCode conversionStatus = U_ZERO_ERROR; \
375
0
    (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
376
0
    if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
377
0
        /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
378
0
        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
379
0
        return; \
380
0
    } else if (U_FAILURE(conversionStatus)) { \
381
0
        (status) = conversionStatus; \
382
0
        return; \
383
0
    } \
384
0
} UPRV_BLOCK_MACRO_END
385
386
} // namespace
387
388
} // namespace impl
389
} // namespace number
390
U_NAMESPACE_END
391
392
#endif //__SOURCE_NUMBER_SKELETONS_H__
393
#endif /* #if !UCONFIG_NO_FORMATTING */