/src/icu/icu4c/source/i18n/number_skeletons.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2018 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_FORMATTING |
7 | | #ifndef __SOURCE_NUMBER_SKELETONS_H__ |
8 | | #define __SOURCE_NUMBER_SKELETONS_H__ |
9 | | |
10 | | #include "number_types.h" |
11 | | #include "numparse_types.h" |
12 | | #include "unicode/ucharstrie.h" |
13 | | #include "string_segment.h" |
14 | | |
15 | | U_NAMESPACE_BEGIN |
16 | | namespace number::impl { |
17 | | |
18 | | // Forward-declaration |
19 | | struct SeenMacroProps; |
20 | | |
21 | | // namespace for enums and entrypoint functions |
22 | | namespace skeleton { |
23 | | |
24 | | //////////////////////////////////////////////////////////////////////////////////////// |
25 | | // NOTE: For examples of how to add a new stem to the number skeleton parser, see: // |
26 | | // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 // |
27 | | // and // |
28 | | // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 // |
29 | | //////////////////////////////////////////////////////////////////////////////////////// |
30 | | |
31 | | /** |
32 | | * While parsing a skeleton, this enum records what type of option we expect to find next. |
33 | | */ |
34 | | enum ParseState { |
35 | | |
36 | | // Section 0: We expect whitespace or a stem, but not an option: |
37 | | |
38 | | STATE_NULL, |
39 | | |
40 | | // Section 1: We might accept an option, but it is not required: |
41 | | |
42 | | STATE_SCIENTIFIC, |
43 | | STATE_FRACTION_PRECISION, |
44 | | STATE_PRECISION, |
45 | | |
46 | | // Section 2: An option is required: |
47 | | |
48 | | STATE_INCREMENT_PRECISION, |
49 | | STATE_MEASURE_UNIT, |
50 | | STATE_PER_MEASURE_UNIT, |
51 | | STATE_IDENTIFIER_UNIT, |
52 | | STATE_UNIT_USAGE, |
53 | | STATE_CURRENCY_UNIT, |
54 | | STATE_INTEGER_WIDTH, |
55 | | STATE_NUMBERING_SYSTEM, |
56 | | STATE_SCALE, |
57 | | }; |
58 | | |
59 | | /** |
60 | | * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem |
61 | | * string literal written in upper snake case. |
62 | | * |
63 | | * @see StemToObject |
64 | | * @see #SERIALIZED_STEM_TRIE |
65 | | */ |
66 | | enum StemEnum { |
67 | | |
68 | | // Section 1: Stems that do not require an option: |
69 | | |
70 | | STEM_COMPACT_SHORT, |
71 | | STEM_COMPACT_LONG, |
72 | | STEM_SCIENTIFIC, |
73 | | STEM_ENGINEERING, |
74 | | STEM_NOTATION_SIMPLE, |
75 | | STEM_BASE_UNIT, |
76 | | STEM_PERCENT, |
77 | | STEM_PERMILLE, |
78 | | STEM_PERCENT_100, // concise-only |
79 | | STEM_PRECISION_INTEGER, |
80 | | STEM_PRECISION_UNLIMITED, |
81 | | STEM_PRECISION_CURRENCY_STANDARD, |
82 | | STEM_PRECISION_CURRENCY_CASH, |
83 | | STEM_ROUNDING_MODE_CEILING, |
84 | | STEM_ROUNDING_MODE_FLOOR, |
85 | | STEM_ROUNDING_MODE_DOWN, |
86 | | STEM_ROUNDING_MODE_UP, |
87 | | STEM_ROUNDING_MODE_HALF_EVEN, |
88 | | STEM_ROUNDING_MODE_HALF_ODD, |
89 | | STEM_ROUNDING_MODE_HALF_CEILING, |
90 | | STEM_ROUNDING_MODE_HALF_FLOOR, |
91 | | STEM_ROUNDING_MODE_HALF_DOWN, |
92 | | STEM_ROUNDING_MODE_HALF_UP, |
93 | | STEM_ROUNDING_MODE_UNNECESSARY, |
94 | | STEM_INTEGER_WIDTH_TRUNC, |
95 | | STEM_GROUP_OFF, |
96 | | STEM_GROUP_MIN2, |
97 | | STEM_GROUP_AUTO, |
98 | | STEM_GROUP_ON_ALIGNED, |
99 | | STEM_GROUP_THOUSANDS, |
100 | | STEM_LATIN, |
101 | | STEM_UNIT_WIDTH_NARROW, |
102 | | STEM_UNIT_WIDTH_SHORT, |
103 | | STEM_UNIT_WIDTH_FULL_NAME, |
104 | | STEM_UNIT_WIDTH_ISO_CODE, |
105 | | STEM_UNIT_WIDTH_FORMAL, |
106 | | STEM_UNIT_WIDTH_VARIANT, |
107 | | STEM_UNIT_WIDTH_HIDDEN, |
108 | | STEM_SIGN_AUTO, |
109 | | STEM_SIGN_ALWAYS, |
110 | | STEM_SIGN_NEVER, |
111 | | STEM_SIGN_ACCOUNTING, |
112 | | STEM_SIGN_ACCOUNTING_ALWAYS, |
113 | | STEM_SIGN_EXCEPT_ZERO, |
114 | | STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, |
115 | | STEM_SIGN_NEGATIVE, |
116 | | STEM_SIGN_ACCOUNTING_NEGATIVE, |
117 | | STEM_DECIMAL_AUTO, |
118 | | STEM_DECIMAL_ALWAYS, |
119 | | |
120 | | // Section 2: Stems that DO require an option: |
121 | | |
122 | | STEM_PRECISION_INCREMENT, |
123 | | STEM_MEASURE_UNIT, |
124 | | STEM_PER_MEASURE_UNIT, |
125 | | STEM_UNIT, |
126 | | STEM_UNIT_USAGE, |
127 | | STEM_CURRENCY, |
128 | | STEM_INTEGER_WIDTH, |
129 | | STEM_NUMBERING_SYSTEM, |
130 | | STEM_SCALE, |
131 | | }; |
132 | | |
133 | | /** Default wildcard char, accepted on input and printed in output */ |
134 | | constexpr char16_t kWildcardChar = u'*'; |
135 | | |
136 | | /** Alternative wildcard char, accept on input but not printed in output */ |
137 | | constexpr char16_t kAltWildcardChar = u'+'; |
138 | | |
139 | | /** Checks whether the char is a wildcard on input */ |
140 | 651 | inline bool isWildcardChar(char16_t c) { |
141 | 651 | return c == kWildcardChar || c == kAltWildcardChar; |
142 | 651 | } |
143 | | |
144 | | /** |
145 | | * Creates a NumberFormatter corresponding to the given skeleton string. |
146 | | * |
147 | | * @param skeletonString |
148 | | * A number skeleton string, possibly not in its shortest form. |
149 | | * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. |
150 | | */ |
151 | | UnlocalizedNumberFormatter create( |
152 | | const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); |
153 | | |
154 | | /** |
155 | | * Create a skeleton string corresponding to the given NumberFormatter. |
156 | | * |
157 | | * @param macros |
158 | | * The NumberFormatter options object. |
159 | | * @return A skeleton string in normalized form. |
160 | | */ |
161 | | UnicodeString generate(const MacroProps& macros, UErrorCode& status); |
162 | | |
163 | | /** |
164 | | * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. |
165 | | * |
166 | | * Internal: use the create() endpoint instead of this function. |
167 | | */ |
168 | | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); |
169 | | |
170 | | /** |
171 | | * Given that the current segment represents a stem, parse it and save the result. |
172 | | * |
173 | | * @return The next state after parsing this stem, corresponding to what subset of options to expect. |
174 | | */ |
175 | | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, |
176 | | MacroProps& macros, UErrorCode& status); |
177 | | |
178 | | /** |
179 | | * Given that the current segment represents an option, parse it and save the result. |
180 | | * |
181 | | * @return The next state after parsing this option, corresponding to what subset of options to |
182 | | * expect next. |
183 | | */ |
184 | | ParseState |
185 | | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
186 | | |
187 | | } // namespace skeleton |
188 | | |
189 | | |
190 | | /** |
191 | | * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This |
192 | | * applies to only the "Section 1" stems, those that are well-defined without an option. |
193 | | */ |
194 | | namespace stem_to_object { |
195 | | |
196 | | Notation notation(skeleton::StemEnum stem); |
197 | | |
198 | | MeasureUnit unit(skeleton::StemEnum stem); |
199 | | |
200 | | Precision precision(skeleton::StemEnum stem); |
201 | | |
202 | | UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem); |
203 | | |
204 | | UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem); |
205 | | |
206 | | UNumberUnitWidth unitWidth(skeleton::StemEnum stem); |
207 | | |
208 | | UNumberSignDisplay signDisplay(skeleton::StemEnum stem); |
209 | | |
210 | | UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem); |
211 | | |
212 | | } // namespace stem_to_object |
213 | | |
214 | | /** |
215 | | * Namespace for utility methods that convert from enums to stem strings. More complex object conversions |
216 | | * take place in the object_to_stem_string namespace. |
217 | | */ |
218 | | namespace enum_to_stem_string { |
219 | | |
220 | | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); |
221 | | |
222 | | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); |
223 | | |
224 | | void unitWidth(UNumberUnitWidth value, UnicodeString& sb); |
225 | | |
226 | | void signDisplay(UNumberSignDisplay value, UnicodeString& sb); |
227 | | |
228 | | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); |
229 | | |
230 | | } // namespace enum_to_stem_string |
231 | | |
232 | | /** |
233 | | * Namespace for utility methods for processing stems and options that cannot be interpreted literally. |
234 | | */ |
235 | | namespace blueprint_helpers { |
236 | | |
237 | | /** @return Whether we successfully found and parsed an exponent width option. */ |
238 | | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
239 | | |
240 | | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); |
241 | | |
242 | | /** @return Whether we successfully found and parsed an exponent sign option. */ |
243 | | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
244 | | |
245 | | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
246 | | |
247 | | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); |
248 | | |
249 | | // "measure-unit/" is deprecated in favour of "unit/". |
250 | | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
251 | | |
252 | | // "per-measure-unit/" is deprecated in favour of "unit/". |
253 | | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
254 | | |
255 | | /** |
256 | | * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as |
257 | | * specified via a "unit/" concise skeleton. |
258 | | */ |
259 | | void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
260 | | |
261 | | void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
262 | | |
263 | | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
264 | | |
265 | | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); |
266 | | |
267 | | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
268 | | |
269 | | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); |
270 | | |
271 | | void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
272 | | |
273 | | // Note: no generateScientificStem since this syntax was added later in ICU 67 |
274 | | |
275 | | void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
276 | | |
277 | | // Note: no generateIntegerStem since this syntax was added later in ICU 67 |
278 | | |
279 | | /** @return Whether we successfully found and parsed a frac-sig option. */ |
280 | | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
281 | | |
282 | | /** @return Whether we successfully found and parsed a trailing zero option. */ |
283 | | bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
284 | | |
285 | | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
286 | | |
287 | | void |
288 | | generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status); |
289 | | |
290 | | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
291 | | |
292 | | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); |
293 | | |
294 | | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
295 | | |
296 | | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); |
297 | | |
298 | | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
299 | | |
300 | | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, |
301 | | UErrorCode& status); |
302 | | |
303 | | } // namespace blueprint_helpers |
304 | | |
305 | | /** |
306 | | * Class for utility methods for generating a token corresponding to each macro-prop. Each method |
307 | | * returns whether or not a token was written to the string builder. |
308 | | * |
309 | | * This needs to be a class, not a namespace, so it can be friended. |
310 | | */ |
311 | | class GeneratorHelpers { |
312 | | public: |
313 | | /** |
314 | | * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given |
315 | | * StringBuilder. |
316 | | * |
317 | | * Internal: use the create() endpoint instead of this function. |
318 | | */ |
319 | | static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
320 | | |
321 | | private: |
322 | | static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
323 | | |
324 | | static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
325 | | |
326 | | static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
327 | | |
328 | | static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
329 | | |
330 | | static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
331 | | |
332 | | static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
333 | | |
334 | | static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
335 | | |
336 | | static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
337 | | |
338 | | static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
339 | | |
340 | | static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
341 | | |
342 | | static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
343 | | |
344 | | static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
345 | | |
346 | | }; |
347 | | |
348 | | /** |
349 | | * Struct for null-checking. |
350 | | * In Java, we can just check the object reference. In C++, we need a different method. |
351 | | */ |
352 | | struct SeenMacroProps { |
353 | | bool notation = false; |
354 | | bool unit = false; |
355 | | bool perUnit = false; |
356 | | bool usage = false; |
357 | | bool precision = false; |
358 | | bool roundingMode = false; |
359 | | bool grouper = false; |
360 | | bool padder = false; |
361 | | bool integerWidth = false; |
362 | | bool symbols = false; |
363 | | bool unitWidth = false; |
364 | | bool sign = false; |
365 | | bool decimal = false; |
366 | | bool scale = false; |
367 | | }; |
368 | | |
369 | | namespace { |
370 | | |
371 | 1.57k | #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ |
372 | 1.57k | UPRV_BLOCK_MACRO_BEGIN { \ |
373 | 1.57k | UErrorCode conversionStatus = U_ZERO_ERROR; \ |
374 | 1.57k | (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ |
375 | 1.57k | if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ |
376 | 20 | /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ |
377 | 20 | (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ |
378 | 20 | return; \ |
379 | 1.55k | } else if (U_FAILURE(conversionStatus)) { \ |
380 | 0 | (status) = conversionStatus; \ |
381 | 0 | return; \ |
382 | 0 | } \ |
383 | 1.57k | } UPRV_BLOCK_MACRO_END |
384 | | |
385 | | } // namespace |
386 | | |
387 | | } // namespace number::impl |
388 | | U_NAMESPACE_END |
389 | | |
390 | | #endif //__SOURCE_NUMBER_SKELETONS_H__ |
391 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |