/src/icu/source/i18n/number_skeletons.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2018 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | #include "unicode/utypes.h"  | 
5  |  |  | 
6  |  | #if !UCONFIG_NO_FORMATTING  | 
7  |  | #ifndef __SOURCE_NUMBER_SKELETONS_H__  | 
8  |  | #define __SOURCE_NUMBER_SKELETONS_H__  | 
9  |  |  | 
10  |  | #include "number_types.h"  | 
11  |  | #include "numparse_types.h"  | 
12  |  | #include "unicode/ucharstrie.h"  | 
13  |  | #include "string_segment.h"  | 
14  |  |  | 
15  |  | U_NAMESPACE_BEGIN  | 
16  |  | namespace number { | 
17  |  | namespace impl { | 
18  |  |  | 
19  |  | // Forward-declaration  | 
20  |  | struct SeenMacroProps;  | 
21  |  |  | 
22  |  | // namespace for enums and entrypoint functions  | 
23  |  | namespace skeleton { | 
24  |  |  | 
25  |  | ////////////////////////////////////////////////////////////////////////////////////////  | 
26  |  | // NOTE: For examples of how to add a new stem to the number skeleton parser, see:    //  | 
27  |  | // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //  | 
28  |  | // and                                                                                //  | 
29  |  | // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //  | 
30  |  | ////////////////////////////////////////////////////////////////////////////////////////  | 
31  |  |  | 
32  |  | /**  | 
33  |  |  * While parsing a skeleton, this enum records what type of option we expect to find next.  | 
34  |  |  */  | 
35  |  | enum ParseState { | 
36  |  |  | 
37  |  |     // Section 0: We expect whitespace or a stem, but not an option:  | 
38  |  |  | 
39  |  |     STATE_NULL,  | 
40  |  |  | 
41  |  |     // Section 1: We might accept an option, but it is not required:  | 
42  |  |  | 
43  |  |     STATE_SCIENTIFIC,  | 
44  |  |     STATE_FRACTION_PRECISION,  | 
45  |  |     STATE_PRECISION,  | 
46  |  |  | 
47  |  |     // Section 2: An option is required:  | 
48  |  |  | 
49  |  |     STATE_INCREMENT_PRECISION,  | 
50  |  |     STATE_MEASURE_UNIT,  | 
51  |  |     STATE_PER_MEASURE_UNIT,  | 
52  |  |     STATE_IDENTIFIER_UNIT,  | 
53  |  |     STATE_UNIT_USAGE,  | 
54  |  |     STATE_CURRENCY_UNIT,  | 
55  |  |     STATE_INTEGER_WIDTH,  | 
56  |  |     STATE_NUMBERING_SYSTEM,  | 
57  |  |     STATE_SCALE,  | 
58  |  | };  | 
59  |  |  | 
60  |  | /**  | 
61  |  |  * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem  | 
62  |  |  * string literal written in upper snake case.  | 
63  |  |  *  | 
64  |  |  * @see StemToObject  | 
65  |  |  * @see #SERIALIZED_STEM_TRIE  | 
66  |  |  */  | 
67  |  | enum StemEnum { | 
68  |  |  | 
69  |  |     // Section 1: Stems that do not require an option:  | 
70  |  |  | 
71  |  |     STEM_COMPACT_SHORT,  | 
72  |  |     STEM_COMPACT_LONG,  | 
73  |  |     STEM_SCIENTIFIC,  | 
74  |  |     STEM_ENGINEERING,  | 
75  |  |     STEM_NOTATION_SIMPLE,  | 
76  |  |     STEM_BASE_UNIT,  | 
77  |  |     STEM_PERCENT,  | 
78  |  |     STEM_PERMILLE,  | 
79  |  |     STEM_PERCENT_100, // concise-only  | 
80  |  |     STEM_PRECISION_INTEGER,  | 
81  |  |     STEM_PRECISION_UNLIMITED,  | 
82  |  |     STEM_PRECISION_CURRENCY_STANDARD,  | 
83  |  |     STEM_PRECISION_CURRENCY_CASH,  | 
84  |  |     STEM_ROUNDING_MODE_CEILING,  | 
85  |  |     STEM_ROUNDING_MODE_FLOOR,  | 
86  |  |     STEM_ROUNDING_MODE_DOWN,  | 
87  |  |     STEM_ROUNDING_MODE_UP,  | 
88  |  |     STEM_ROUNDING_MODE_HALF_EVEN,  | 
89  |  |     STEM_ROUNDING_MODE_HALF_ODD,  | 
90  |  |     STEM_ROUNDING_MODE_HALF_CEILING,  | 
91  |  |     STEM_ROUNDING_MODE_HALF_FLOOR,  | 
92  |  |     STEM_ROUNDING_MODE_HALF_DOWN,  | 
93  |  |     STEM_ROUNDING_MODE_HALF_UP,  | 
94  |  |     STEM_ROUNDING_MODE_UNNECESSARY,  | 
95  |  |     STEM_INTEGER_WIDTH_TRUNC,  | 
96  |  |     STEM_GROUP_OFF,  | 
97  |  |     STEM_GROUP_MIN2,  | 
98  |  |     STEM_GROUP_AUTO,  | 
99  |  |     STEM_GROUP_ON_ALIGNED,  | 
100  |  |     STEM_GROUP_THOUSANDS,  | 
101  |  |     STEM_LATIN,  | 
102  |  |     STEM_UNIT_WIDTH_NARROW,  | 
103  |  |     STEM_UNIT_WIDTH_SHORT,  | 
104  |  |     STEM_UNIT_WIDTH_FULL_NAME,  | 
105  |  |     STEM_UNIT_WIDTH_ISO_CODE,  | 
106  |  |     STEM_UNIT_WIDTH_FORMAL,  | 
107  |  |     STEM_UNIT_WIDTH_VARIANT,  | 
108  |  |     STEM_UNIT_WIDTH_HIDDEN,  | 
109  |  |     STEM_SIGN_AUTO,  | 
110  |  |     STEM_SIGN_ALWAYS,  | 
111  |  |     STEM_SIGN_NEVER,  | 
112  |  |     STEM_SIGN_ACCOUNTING,  | 
113  |  |     STEM_SIGN_ACCOUNTING_ALWAYS,  | 
114  |  |     STEM_SIGN_EXCEPT_ZERO,  | 
115  |  |     STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,  | 
116  |  |     STEM_SIGN_NEGATIVE,  | 
117  |  |     STEM_SIGN_ACCOUNTING_NEGATIVE,  | 
118  |  |     STEM_DECIMAL_AUTO,  | 
119  |  |     STEM_DECIMAL_ALWAYS,  | 
120  |  |  | 
121  |  |     // Section 2: Stems that DO require an option:  | 
122  |  |  | 
123  |  |     STEM_PRECISION_INCREMENT,  | 
124  |  |     STEM_MEASURE_UNIT,  | 
125  |  |     STEM_PER_MEASURE_UNIT,  | 
126  |  |     STEM_UNIT,  | 
127  |  |     STEM_UNIT_USAGE,  | 
128  |  |     STEM_CURRENCY,  | 
129  |  |     STEM_INTEGER_WIDTH,  | 
130  |  |     STEM_NUMBERING_SYSTEM,  | 
131  |  |     STEM_SCALE,  | 
132  |  | };  | 
133  |  |  | 
134  |  | /** Default wildcard char, accepted on input and printed in output */  | 
135  |  | constexpr char16_t kWildcardChar = u'*';  | 
136  |  |  | 
137  |  | /** Alternative wildcard char, accept on input but not printed in output */  | 
138  |  | constexpr char16_t kAltWildcardChar = u'+';  | 
139  |  |  | 
140  |  | /** Checks whether the char is a wildcard on input */  | 
141  | 0  | inline bool isWildcardChar(char16_t c) { | 
142  | 0  |     return c == kWildcardChar || c == kAltWildcardChar;  | 
143  | 0  | }  | 
144  |  |  | 
145  |  | /**  | 
146  |  |  * Creates a NumberFormatter corresponding to the given skeleton string.  | 
147  |  |  *  | 
148  |  |  * @param skeletonString  | 
149  |  |  *            A number skeleton string, possibly not in its shortest form.  | 
150  |  |  * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.  | 
151  |  |  */  | 
152  |  | UnlocalizedNumberFormatter create(  | 
153  |  |     const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);  | 
154  |  |  | 
155  |  | /**  | 
156  |  |  * Create a skeleton string corresponding to the given NumberFormatter.  | 
157  |  |  *  | 
158  |  |  * @param macros  | 
159  |  |  *            The NumberFormatter options object.  | 
160  |  |  * @return A skeleton string in normalized form.  | 
161  |  |  */  | 
162  |  | UnicodeString generate(const MacroProps& macros, UErrorCode& status);  | 
163  |  |  | 
164  |  | /**  | 
165  |  |  * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.  | 
166  |  |  *  | 
167  |  |  * Internal: use the create() endpoint instead of this function.  | 
168  |  |  */  | 
169  |  | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);  | 
170  |  |  | 
171  |  | /**  | 
172  |  |  * Given that the current segment represents a stem, parse it and save the result.  | 
173  |  |  *  | 
174  |  |  * @return The next state after parsing this stem, corresponding to what subset of options to expect.  | 
175  |  |  */  | 
176  |  | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,  | 
177  |  |                      MacroProps& macros, UErrorCode& status);  | 
178  |  |  | 
179  |  | /**  | 
180  |  |  * Given that the current segment represents an option, parse it and save the result.  | 
181  |  |  *  | 
182  |  |  * @return The next state after parsing this option, corresponding to what subset of options to  | 
183  |  |  *         expect next.  | 
184  |  |  */  | 
185  |  | ParseState  | 
186  |  | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
187  |  |  | 
188  |  | } // namespace skeleton  | 
189  |  |  | 
190  |  |  | 
191  |  | /**  | 
192  |  |  * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This  | 
193  |  |  * applies to only the "Section 1" stems, those that are well-defined without an option.  | 
194  |  |  */  | 
195  |  | namespace stem_to_object { | 
196  |  |  | 
197  |  | Notation notation(skeleton::StemEnum stem);  | 
198  |  |  | 
199  |  | MeasureUnit unit(skeleton::StemEnum stem);  | 
200  |  |  | 
201  |  | Precision precision(skeleton::StemEnum stem);  | 
202  |  |  | 
203  |  | UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);  | 
204  |  |  | 
205  |  | UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);  | 
206  |  |  | 
207  |  | UNumberUnitWidth unitWidth(skeleton::StemEnum stem);  | 
208  |  |  | 
209  |  | UNumberSignDisplay signDisplay(skeleton::StemEnum stem);  | 
210  |  |  | 
211  |  | UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);  | 
212  |  |  | 
213  |  | } // namespace stem_to_object  | 
214  |  |  | 
215  |  | /**  | 
216  |  |  * Namespace for utility methods that convert from enums to stem strings. More complex object conversions  | 
217  |  |  * take place in the object_to_stem_string namespace.  | 
218  |  |  */  | 
219  |  | namespace enum_to_stem_string { | 
220  |  |  | 
221  |  | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);  | 
222  |  |  | 
223  |  | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);  | 
224  |  |  | 
225  |  | void unitWidth(UNumberUnitWidth value, UnicodeString& sb);  | 
226  |  |  | 
227  |  | void signDisplay(UNumberSignDisplay value, UnicodeString& sb);  | 
228  |  |  | 
229  |  | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);  | 
230  |  |  | 
231  |  | } // namespace enum_to_stem_string  | 
232  |  |  | 
233  |  | /**  | 
234  |  |  * Namespace for utility methods for processing stems and options that cannot be interpreted literally.  | 
235  |  |  */  | 
236  |  | namespace blueprint_helpers { | 
237  |  |  | 
238  |  | /** @return Whether we successfully found and parsed an exponent width option. */  | 
239  |  | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
240  |  |  | 
241  |  | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);  | 
242  |  |  | 
243  |  | /** @return Whether we successfully found and parsed an exponent sign option. */  | 
244  |  | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
245  |  |  | 
246  |  | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
247  |  |  | 
248  |  | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);  | 
249  |  |  | 
250  |  | // "measure-unit/" is deprecated in favour of "unit/".  | 
251  |  | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
252  |  |  | 
253  |  | // "per-measure-unit/" is deprecated in favour of "unit/".  | 
254  |  | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
255  |  |  | 
256  |  | /**  | 
257  |  |  * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as  | 
258  |  |  * specified via a "unit/" concise skeleton.  | 
259  |  |  */  | 
260  |  | void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
261  |  |  | 
262  |  | void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
263  |  |  | 
264  |  | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
265  |  |  | 
266  |  | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);  | 
267  |  |  | 
268  |  | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
269  |  |  | 
270  |  | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);  | 
271  |  |  | 
272  |  | void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
273  |  |  | 
274  |  | // Note: no generateScientificStem since this syntax was added later in ICU 67  | 
275  |  |  | 
276  |  | void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
277  |  |  | 
278  |  | // Note: no generateIntegerStem since this syntax was added later in ICU 67  | 
279  |  |  | 
280  |  | /** @return Whether we successfully found and parsed a frac-sig option. */  | 
281  |  | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
282  |  |  | 
283  |  | /** @return Whether we successfully found and parsed a trailing zero option. */  | 
284  |  | bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
285  |  |  | 
286  |  | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
287  |  |  | 
288  |  | void  | 
289  |  | generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);  | 
290  |  |  | 
291  |  | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
292  |  |  | 
293  |  | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);  | 
294  |  |  | 
295  |  | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
296  |  |  | 
297  |  | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);  | 
298  |  |  | 
299  |  | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);  | 
300  |  |  | 
301  |  | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,  | 
302  |  |                               UErrorCode& status);  | 
303  |  |  | 
304  |  | } // namespace blueprint_helpers  | 
305  |  |  | 
306  |  | /**  | 
307  |  |  * Class for utility methods for generating a token corresponding to each macro-prop. Each method  | 
308  |  |  * returns whether or not a token was written to the string builder.  | 
309  |  |  *  | 
310  |  |  * This needs to be a class, not a namespace, so it can be friended.  | 
311  |  |  */  | 
312  |  | class GeneratorHelpers { | 
313  |  |   public:  | 
314  |  |     /**  | 
315  |  |      * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given  | 
316  |  |      * StringBuilder.  | 
317  |  |      *  | 
318  |  |      * Internal: use the create() endpoint instead of this function.  | 
319  |  |      */  | 
320  |  |     static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
321  |  |  | 
322  |  |   private:  | 
323  |  |     static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
324  |  |  | 
325  |  |     static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
326  |  |  | 
327  |  |     static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
328  |  |  | 
329  |  |     static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
330  |  |  | 
331  |  |     static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
332  |  |  | 
333  |  |     static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
334  |  |  | 
335  |  |     static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
336  |  |  | 
337  |  |     static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
338  |  |  | 
339  |  |     static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
340  |  |  | 
341  |  |     static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
342  |  |  | 
343  |  |     static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
344  |  |  | 
345  |  |     static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);  | 
346  |  |  | 
347  |  | };  | 
348  |  |  | 
349  |  | /**  | 
350  |  |  * Struct for null-checking.  | 
351  |  |  * In Java, we can just check the object reference. In C++, we need a different method.  | 
352  |  |  */  | 
353  |  | struct SeenMacroProps { | 
354  |  |     bool notation = false;  | 
355  |  |     bool unit = false;  | 
356  |  |     bool perUnit = false;  | 
357  |  |     bool usage = false;  | 
358  |  |     bool precision = false;  | 
359  |  |     bool roundingMode = false;  | 
360  |  |     bool grouper = false;  | 
361  |  |     bool padder = false;  | 
362  |  |     bool integerWidth = false;  | 
363  |  |     bool symbols = false;  | 
364  |  |     bool unitWidth = false;  | 
365  |  |     bool sign = false;  | 
366  |  |     bool decimal = false;  | 
367  |  |     bool scale = false;  | 
368  |  | };  | 
369  |  |  | 
370  |  | namespace { | 
371  |  |  | 
372  | 0  | #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \  | 
373  | 0  | UPRV_BLOCK_MACRO_BEGIN { \ | 
374  | 0  |     UErrorCode conversionStatus = U_ZERO_ERROR; \  | 
375  | 0  |     (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ | 
376  | 0  |     if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ | 
377  | 0  |         /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \  | 
378  | 0  |         (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \  | 
379  | 0  |         return; \  | 
380  | 0  |     } else if (U_FAILURE(conversionStatus)) { \ | 
381  | 0  |         (status) = conversionStatus; \  | 
382  | 0  |         return; \  | 
383  | 0  |     } \  | 
384  | 0  | } UPRV_BLOCK_MACRO_END  | 
385  |  |  | 
386  |  | } // namespace  | 
387  |  |  | 
388  |  | } // namespace impl  | 
389  |  | } // namespace number  | 
390  |  | U_NAMESPACE_END  | 
391  |  |  | 
392  |  | #endif //__SOURCE_NUMBER_SKELETONS_H__  | 
393  |  | #endif /* #if !UCONFIG_NO_FORMATTING */  |