/src/icu/icu4c/source/i18n/number_patternstring.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2017 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_FORMATTING |
7 | | |
8 | | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
9 | | // Helpful in toString methods and elsewhere. |
10 | | #define UNISTR_FROM_STRING_EXPLICIT |
11 | | #define UNISTR_FROM_CHAR_EXPLICIT |
12 | | |
13 | | #include "uassert.h" |
14 | | #include "number_patternstring.h" |
15 | | #include "unicode/utf16.h" |
16 | | #include "number_utils.h" |
17 | | #include "number_roundingutils.h" |
18 | | #include "number_mapper.h" |
19 | | |
20 | | using namespace icu; |
21 | | using namespace icu::number; |
22 | | using namespace icu::number::impl; |
23 | | |
24 | | |
25 | | void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, |
26 | 254k | UErrorCode& status) { |
27 | 254k | patternInfo.consumePattern(patternString, status); |
28 | 254k | } |
29 | | |
30 | | DecimalFormatProperties |
31 | | PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, |
32 | 0 | UErrorCode& status) { |
33 | 0 | DecimalFormatProperties properties; |
34 | 0 | parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
35 | 0 | return properties; |
36 | 0 | } |
37 | | |
38 | | DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, |
39 | 0 | UErrorCode& status) { |
40 | 0 | return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); |
41 | 0 | } |
42 | | |
43 | | void |
44 | | PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, |
45 | 63.9k | IgnoreRounding ignoreRounding, UErrorCode& status) { |
46 | 63.9k | parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
47 | 63.9k | } |
48 | | |
49 | | |
50 | 7.47k | char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { |
51 | 7.47k | const Endpoints& endpoints = getEndpoints(flags); |
52 | 7.47k | if (index < 0 || index >= endpoints.end - endpoints.start) { |
53 | 0 | UPRV_UNREACHABLE_EXIT; |
54 | 0 | } |
55 | 7.47k | return pattern.charAt(endpoints.start + index); |
56 | 7.47k | } |
57 | | |
58 | 1.44M | int32_t ParsedPatternInfo::length(int32_t flags) const { |
59 | 1.44M | return getLengthFromEndpoints(getEndpoints(flags)); |
60 | 1.44M | } |
61 | | |
62 | 1.44M | int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { |
63 | 1.44M | return endpoints.end - endpoints.start; |
64 | 1.44M | } |
65 | | |
66 | 127k | UnicodeString ParsedPatternInfo::getString(int32_t flags) const { |
67 | 127k | const Endpoints& endpoints = getEndpoints(flags); |
68 | 127k | if (endpoints.start == endpoints.end) { |
69 | 127k | return UnicodeString(); |
70 | 127k | } |
71 | | // Create a new UnicodeString |
72 | 0 | return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); |
73 | 127k | } |
74 | | |
75 | 1.58M | const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { |
76 | 1.58M | bool prefix = (flags & AFFIX_PREFIX) != 0; |
77 | 1.58M | bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; |
78 | 1.58M | bool padding = (flags & AFFIX_PADDING) != 0; |
79 | 1.58M | if (isNegative && padding) { |
80 | 0 | return negative.paddingEndpoints; |
81 | 1.58M | } else if (padding) { |
82 | 0 | return positive.paddingEndpoints; |
83 | 1.58M | } else if (prefix && isNegative) { |
84 | 95 | return negative.prefixEndpoints; |
85 | 1.58M | } else if (prefix) { |
86 | 787k | return positive.prefixEndpoints; |
87 | 794k | } else if (isNegative) { |
88 | 27 | return negative.suffixEndpoints; |
89 | 794k | } else { |
90 | 794k | return positive.suffixEndpoints; |
91 | 794k | } |
92 | 1.58M | } |
93 | | |
94 | 358k | bool ParsedPatternInfo::positiveHasPlusSign() const { |
95 | 358k | return positive.hasPlusSign; |
96 | 358k | } |
97 | | |
98 | 1.44M | bool ParsedPatternInfo::hasNegativeSubpattern() const { |
99 | 1.44M | return fHasNegativeSubpattern; |
100 | 1.44M | } |
101 | | |
102 | 28 | bool ParsedPatternInfo::negativeHasMinusSign() const { |
103 | 28 | return negative.hasMinusSign; |
104 | 28 | } |
105 | | |
106 | 716k | bool ParsedPatternInfo::hasCurrencySign() const { |
107 | 716k | return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); |
108 | 716k | } |
109 | | |
110 | 12.0k | bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { |
111 | 12.0k | return AffixUtils::containsType(pattern, type, status); |
112 | 12.0k | } |
113 | | |
114 | 723k | bool ParsedPatternInfo::hasBody() const { |
115 | 723k | return positive.integerTotal > 0; |
116 | 723k | } |
117 | | |
118 | 7.00k | bool ParsedPatternInfo::currencyAsDecimal() const { |
119 | 7.00k | return positive.hasCurrencyDecimal; |
120 | 7.00k | } |
121 | | |
122 | | ///////////////////////////////////////////////////// |
123 | | /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
124 | | ///////////////////////////////////////////////////// |
125 | | |
126 | 7.33M | UChar32 ParsedPatternInfo::ParserState::peek() { |
127 | 7.33M | if (offset == pattern.length()) { |
128 | 1.83M | return -1; |
129 | 5.49M | } else { |
130 | 5.49M | return pattern.char32At(offset); |
131 | 5.49M | } |
132 | 7.33M | } |
133 | | |
134 | 0 | UChar32 ParsedPatternInfo::ParserState::peek2() { |
135 | 0 | if (offset == pattern.length()) { |
136 | 0 | return -1; |
137 | 0 | } |
138 | 0 | int32_t cp1 = pattern.char32At(offset); |
139 | 0 | int32_t offset2 = offset + U16_LENGTH(cp1); |
140 | 0 | if (offset2 == pattern.length()) { |
141 | 0 | return -1; |
142 | 0 | } |
143 | 0 | return pattern.char32At(offset2); |
144 | 0 | } |
145 | | |
146 | 2.09M | UChar32 ParsedPatternInfo::ParserState::next() { |
147 | 2.09M | int32_t codePoint = peek(); |
148 | 2.09M | offset += U16_LENGTH(codePoint); |
149 | 2.09M | return codePoint; |
150 | 2.09M | } |
151 | | |
152 | 254k | void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { |
153 | 254k | if (U_FAILURE(status)) { return; } |
154 | 254k | this->pattern = patternString; |
155 | | |
156 | | // This class is not intended for writing twice! |
157 | | // Use move assignment to overwrite instead. |
158 | 254k | U_ASSERT(state.offset == 0); |
159 | | |
160 | | // pattern := subpattern (';' subpattern)? |
161 | 254k | currentSubpattern = &positive; |
162 | 254k | consumeSubpattern(status); |
163 | 254k | if (U_FAILURE(status)) { return; } |
164 | 254k | if (state.peek() == u';') { |
165 | 31 | state.next(); // consume the ';' |
166 | | // Don't consume the negative subpattern if it is empty (trailing ';') |
167 | 31 | if (state.peek() != -1) { |
168 | 31 | fHasNegativeSubpattern = true; |
169 | 31 | currentSubpattern = &negative; |
170 | 31 | consumeSubpattern(status); |
171 | 31 | if (U_FAILURE(status)) { return; } |
172 | 31 | } |
173 | 31 | } |
174 | 254k | if (state.peek() != -1) { |
175 | 0 | state.toParseException(u"Found unquoted special character"); |
176 | 0 | status = U_UNQUOTED_SPECIAL; |
177 | 0 | } |
178 | 254k | } |
179 | | |
180 | 254k | void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { |
181 | | // subpattern := literals? number exponent? literals? |
182 | 254k | consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); |
183 | 254k | if (U_FAILURE(status)) { return; } |
184 | 254k | consumeAffix(currentSubpattern->prefixEndpoints, status); |
185 | 254k | if (U_FAILURE(status)) { return; } |
186 | 254k | consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); |
187 | 254k | if (U_FAILURE(status)) { return; } |
188 | 254k | consumeFormat(status); |
189 | 254k | if (U_FAILURE(status)) { return; } |
190 | 254k | consumeExponent(status); |
191 | 254k | if (U_FAILURE(status)) { return; } |
192 | 254k | consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); |
193 | 254k | if (U_FAILURE(status)) { return; } |
194 | 254k | consumeAffix(currentSubpattern->suffixEndpoints, status); |
195 | 254k | if (U_FAILURE(status)) { return; } |
196 | 254k | consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); |
197 | 254k | if (U_FAILURE(status)) { return; } |
198 | 254k | } |
199 | | |
200 | 1.01M | void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { |
201 | 1.01M | if (state.peek() != u'*') { |
202 | 1.01M | return; |
203 | 1.01M | } |
204 | 0 | if (currentSubpattern->hasPadding) { |
205 | 0 | state.toParseException(u"Cannot have multiple pad specifiers"); |
206 | 0 | status = U_MULTIPLE_PAD_SPECIFIERS; |
207 | 0 | return; |
208 | 0 | } |
209 | 0 | currentSubpattern->paddingLocation = paddingLocation; |
210 | 0 | currentSubpattern->hasPadding = true; |
211 | 0 | state.next(); // consume the '*' |
212 | 0 | currentSubpattern->paddingEndpoints.start = state.offset; |
213 | 0 | consumeLiteral(status); |
214 | 0 | currentSubpattern->paddingEndpoints.end = state.offset; |
215 | 0 | } |
216 | | |
217 | 508k | void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { |
218 | | // literals := { literal } |
219 | 508k | endpoints.start = state.offset; |
220 | 514k | while (true) { |
221 | 514k | switch (state.peek()) { |
222 | 180k | case u'#': |
223 | 180k | case u'@': |
224 | 180k | case u';': |
225 | 180k | case u'*': |
226 | 180k | case u'.': |
227 | 180k | case u',': |
228 | 254k | case u'0': |
229 | 254k | case u'1': |
230 | 254k | case u'2': |
231 | 254k | case u'3': |
232 | 254k | case u'4': |
233 | 254k | case u'5': |
234 | 254k | case u'6': |
235 | 254k | case u'7': |
236 | 254k | case u'8': |
237 | 254k | case u'9': |
238 | 508k | case -1: |
239 | | // Characters that cannot appear unquoted in a literal |
240 | | // break outer; |
241 | 508k | goto after_outer; |
242 | | |
243 | 56 | case u'%': |
244 | 56 | currentSubpattern->hasPercentSign = true; |
245 | 56 | break; |
246 | | |
247 | 0 | case u'‰': |
248 | 0 | currentSubpattern->hasPerMilleSign = true; |
249 | 0 | break; |
250 | | |
251 | 0 | case u'¤': |
252 | 0 | currentSubpattern->hasCurrencySign = true; |
253 | 0 | break; |
254 | | |
255 | 40 | case u'-': |
256 | 40 | currentSubpattern->hasMinusSign = true; |
257 | 40 | break; |
258 | | |
259 | 0 | case u'+': |
260 | 0 | currentSubpattern->hasPlusSign = true; |
261 | 0 | break; |
262 | | |
263 | 6.55k | default: |
264 | 6.55k | break; |
265 | 514k | } |
266 | 6.64k | consumeLiteral(status); |
267 | 6.64k | if (U_FAILURE(status)) { return; } |
268 | 6.64k | } |
269 | 508k | after_outer: |
270 | 508k | endpoints.end = state.offset; |
271 | 508k | } |
272 | | |
273 | 6.64k | void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { |
274 | 6.64k | if (state.peek() == -1) { |
275 | 0 | state.toParseException(u"Expected unquoted literal but found EOL"); |
276 | 0 | status = U_PATTERN_SYNTAX_ERROR; |
277 | 0 | return; |
278 | 6.64k | } else if (state.peek() == u'\'') { |
279 | 441 | state.next(); // consume the starting quote |
280 | 882 | while (state.peek() != u'\'') { |
281 | 441 | if (state.peek() == -1) { |
282 | 0 | state.toParseException(u"Expected quoted literal but found EOL"); |
283 | 0 | status = U_PATTERN_SYNTAX_ERROR; |
284 | 0 | return; |
285 | 441 | } else { |
286 | 441 | state.next(); // consume a quoted character |
287 | 441 | } |
288 | 441 | } |
289 | 441 | state.next(); // consume the ending quote |
290 | 6.20k | } else { |
291 | | // consume a non-quoted literal character |
292 | 6.20k | state.next(); |
293 | 6.20k | } |
294 | 6.64k | } |
295 | | |
296 | 254k | void ParsedPatternInfo::consumeFormat(UErrorCode& status) { |
297 | 254k | consumeIntegerFormat(status); |
298 | 254k | if (U_FAILURE(status)) { return; } |
299 | 254k | if (state.peek() == u'.') { |
300 | 222k | state.next(); // consume the decimal point |
301 | 222k | currentSubpattern->hasDecimal = true; |
302 | 222k | currentSubpattern->widthExceptAffixes += 1; |
303 | 222k | consumeFractionFormat(status); |
304 | 222k | if (U_FAILURE(status)) { return; } |
305 | 222k | } else if (state.peek() == u'¤') { |
306 | | // Check if currency is a decimal separator |
307 | 0 | switch (state.peek2()) { |
308 | 0 | case u'#': |
309 | 0 | case u'0': |
310 | 0 | case u'1': |
311 | 0 | case u'2': |
312 | 0 | case u'3': |
313 | 0 | case u'4': |
314 | 0 | case u'5': |
315 | 0 | case u'6': |
316 | 0 | case u'7': |
317 | 0 | case u'8': |
318 | 0 | case u'9': |
319 | 0 | break; |
320 | 0 | default: |
321 | | // Currency symbol followed by a non-numeric character; |
322 | | // treat as a normal affix. |
323 | 0 | return; |
324 | 0 | } |
325 | | // Currency symbol is followed by a numeric character; |
326 | | // treat as a decimal separator. |
327 | 0 | currentSubpattern->hasCurrencySign = true; |
328 | 0 | currentSubpattern->hasCurrencyDecimal = true; |
329 | 0 | currentSubpattern->hasDecimal = true; |
330 | 0 | currentSubpattern->widthExceptAffixes += 1; |
331 | 0 | state.next(); // consume the symbol |
332 | 0 | consumeFractionFormat(status); |
333 | 0 | if (U_FAILURE(status)) { return; } |
334 | 0 | } |
335 | 254k | } |
336 | | |
337 | 254k | void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { |
338 | | // Convenience reference: |
339 | 254k | ParsedSubpatternInfo& result = *currentSubpattern; |
340 | | |
341 | 1.25M | while (true) { |
342 | 1.25M | switch (state.peek()) { |
343 | 189k | case u',': |
344 | 189k | result.widthExceptAffixes += 1; |
345 | 189k | result.groupingSizes <<= 16; |
346 | 189k | break; |
347 | | |
348 | 558k | case u'#': |
349 | 558k | if (result.integerNumerals > 0) { |
350 | 0 | state.toParseException(u"# cannot follow 0 before decimal point"); |
351 | 0 | status = U_UNEXPECTED_TOKEN; |
352 | 0 | return; |
353 | 0 | } |
354 | 558k | result.widthExceptAffixes += 1; |
355 | 558k | result.groupingSizes += 1; |
356 | 558k | if (result.integerAtSigns > 0) { |
357 | 0 | result.integerTrailingHashSigns += 1; |
358 | 558k | } else { |
359 | 558k | result.integerLeadingHashSigns += 1; |
360 | 558k | } |
361 | 558k | result.integerTotal += 1; |
362 | 558k | break; |
363 | | |
364 | 0 | case u'@': |
365 | 0 | if (result.integerNumerals > 0) { |
366 | 0 | state.toParseException(u"Cannot mix 0 and @"); |
367 | 0 | status = U_UNEXPECTED_TOKEN; |
368 | 0 | return; |
369 | 0 | } |
370 | 0 | if (result.integerTrailingHashSigns > 0) { |
371 | 0 | state.toParseException(u"Cannot nest # inside of a run of @"); |
372 | 0 | status = U_UNEXPECTED_TOKEN; |
373 | 0 | return; |
374 | 0 | } |
375 | 0 | result.widthExceptAffixes += 1; |
376 | 0 | result.groupingSizes += 1; |
377 | 0 | result.integerAtSigns += 1; |
378 | 0 | result.integerTotal += 1; |
379 | 0 | break; |
380 | | |
381 | 257k | case u'0': |
382 | 257k | case u'1': |
383 | 257k | case u'2': |
384 | 257k | case u'3': |
385 | 257k | case u'4': |
386 | 257k | case u'5': |
387 | 257k | case u'6': |
388 | 257k | case u'7': |
389 | 257k | case u'8': |
390 | 257k | case u'9': |
391 | 257k | if (result.integerAtSigns > 0) { |
392 | 0 | state.toParseException(u"Cannot mix @ and 0"); |
393 | 0 | status = U_UNEXPECTED_TOKEN; |
394 | 0 | return; |
395 | 0 | } |
396 | 257k | result.widthExceptAffixes += 1; |
397 | 257k | result.groupingSizes += 1; |
398 | 257k | result.integerNumerals += 1; |
399 | 257k | result.integerTotal += 1; |
400 | 257k | if (!result.rounding.isZeroish() || state.peek() != u'0') { |
401 | 0 | result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true); |
402 | 0 | } |
403 | 257k | break; |
404 | | |
405 | 254k | default: |
406 | 254k | goto after_outer; |
407 | 1.25M | } |
408 | 1.00M | state.next(); // consume the symbol |
409 | 1.00M | } |
410 | | |
411 | 254k | after_outer: |
412 | | // Disallow patterns with a trailing ',' or with two ',' next to each other |
413 | 254k | auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff); |
414 | 254k | auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff); |
415 | 254k | auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff); |
416 | 254k | if (grouping1 == 0 && grouping2 != -1) { |
417 | 0 | state.toParseException(u"Trailing grouping separator is invalid"); |
418 | 0 | status = U_UNEXPECTED_TOKEN; |
419 | 0 | return; |
420 | 0 | } |
421 | 254k | if (grouping2 == 0 && grouping3 != -1) { |
422 | 0 | state.toParseException(u"Grouping width of zero is invalid"); |
423 | 0 | status = U_PATTERN_SYNTAX_ERROR; |
424 | 0 | return; |
425 | 0 | } |
426 | 254k | } |
427 | | |
428 | 222k | void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { |
429 | | // Convenience reference: |
430 | 222k | ParsedSubpatternInfo& result = *currentSubpattern; |
431 | | |
432 | 222k | int32_t zeroCounter = 0; |
433 | 1.08M | while (true) { |
434 | 1.08M | switch (state.peek()) { |
435 | 824k | case u'#': |
436 | 824k | result.widthExceptAffixes += 1; |
437 | 824k | result.fractionHashSigns += 1; |
438 | 824k | result.fractionTotal += 1; |
439 | 824k | zeroCounter++; |
440 | 824k | break; |
441 | | |
442 | 38.5k | case u'0': |
443 | 38.5k | case u'1': |
444 | 38.5k | case u'2': |
445 | 38.5k | case u'3': |
446 | 38.5k | case u'4': |
447 | 38.5k | case u'5': |
448 | 38.5k | case u'6': |
449 | 38.5k | case u'7': |
450 | 38.5k | case u'8': |
451 | 38.5k | case u'9': |
452 | 38.5k | if (result.fractionHashSigns > 0) { |
453 | 0 | state.toParseException(u"0 cannot follow # after decimal point"); |
454 | 0 | status = U_UNEXPECTED_TOKEN; |
455 | 0 | return; |
456 | 0 | } |
457 | 38.5k | result.widthExceptAffixes += 1; |
458 | 38.5k | result.fractionNumerals += 1; |
459 | 38.5k | result.fractionTotal += 1; |
460 | 38.5k | if (state.peek() == u'0') { |
461 | 38.5k | zeroCounter++; |
462 | 38.5k | } else { |
463 | 0 | result.rounding |
464 | 0 | .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false); |
465 | 0 | zeroCounter = 0; |
466 | 0 | } |
467 | 38.5k | break; |
468 | | |
469 | 222k | default: |
470 | 222k | return; |
471 | 1.08M | } |
472 | 862k | state.next(); // consume the symbol |
473 | 862k | } |
474 | 222k | } |
475 | | |
476 | 254k | void ParsedPatternInfo::consumeExponent(UErrorCode& status) { |
477 | | // Convenience reference: |
478 | 254k | ParsedSubpatternInfo& result = *currentSubpattern; |
479 | | |
480 | 254k | if (state.peek() != u'E') { |
481 | 254k | return; |
482 | 254k | } |
483 | 0 | if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { |
484 | 0 | state.toParseException(u"Cannot have grouping separator in scientific notation"); |
485 | 0 | status = U_MALFORMED_EXPONENTIAL_PATTERN; |
486 | 0 | return; |
487 | 0 | } |
488 | 0 | state.next(); // consume the E |
489 | 0 | result.widthExceptAffixes++; |
490 | 0 | if (state.peek() == u'+') { |
491 | 0 | state.next(); // consume the + |
492 | 0 | result.exponentHasPlusSign = true; |
493 | 0 | result.widthExceptAffixes++; |
494 | 0 | } |
495 | 0 | while (state.peek() == u'0') { |
496 | 0 | state.next(); // consume the 0 |
497 | 0 | result.exponentZeros += 1; |
498 | 0 | result.widthExceptAffixes++; |
499 | 0 | } |
500 | 0 | } |
501 | | |
502 | | /////////////////////////////////////////////////// |
503 | | /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
504 | | /////////////////////////////////////////////////// |
505 | | |
506 | | void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, |
507 | | DecimalFormatProperties& properties, |
508 | 63.9k | IgnoreRounding ignoreRounding, UErrorCode& status) { |
509 | 63.9k | if (pattern.length() == 0) { |
510 | | // Backwards compatibility requires that we reset to the default values. |
511 | | // TODO: Only overwrite the properties that "saveToProperties" normally touches? |
512 | 0 | properties.clear(); |
513 | 0 | return; |
514 | 0 | } |
515 | | |
516 | 63.9k | ParsedPatternInfo patternInfo; |
517 | 63.9k | parseToPatternInfo(pattern, patternInfo, status); |
518 | 63.9k | if (U_FAILURE(status)) { return; } |
519 | 63.9k | patternInfoToProperties(properties, patternInfo, ignoreRounding, status); |
520 | 63.9k | } |
521 | | |
522 | | void |
523 | | PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, |
524 | 63.9k | IgnoreRounding _ignoreRounding, UErrorCode& status) { |
525 | | // Translate from PatternParseResult to Properties. |
526 | | // Note that most data from "negative" is ignored per the specification of DecimalFormat. |
527 | | |
528 | 63.9k | const ParsedSubpatternInfo& positive = patternInfo.positive; |
529 | | |
530 | 63.9k | bool ignoreRounding; |
531 | 63.9k | if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { |
532 | 0 | ignoreRounding = false; |
533 | 63.9k | } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { |
534 | 63.9k | ignoreRounding = positive.hasCurrencySign; |
535 | 63.9k | } else { |
536 | 0 | U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); |
537 | 0 | ignoreRounding = true; |
538 | 0 | } |
539 | | |
540 | | // Grouping settings |
541 | 63.9k | auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff); |
542 | 63.9k | auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff); |
543 | 63.9k | auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); |
544 | 63.9k | if (grouping2 != -1) { |
545 | 58.2k | properties.groupingSize = grouping1; |
546 | 58.2k | properties.groupingUsed = true; |
547 | 58.2k | } else { |
548 | 5.75k | properties.groupingSize = -1; |
549 | 5.75k | properties.groupingUsed = false; |
550 | 5.75k | } |
551 | 63.9k | if (grouping3 != -1) { |
552 | 2.63k | properties.secondaryGroupingSize = grouping2; |
553 | 61.3k | } else { |
554 | 61.3k | properties.secondaryGroupingSize = -1; |
555 | 61.3k | } |
556 | | |
557 | | // For backwards compatibility, require that the pattern emit at least one min digit. |
558 | 63.9k | int minInt, minFrac; |
559 | 63.9k | if (positive.integerTotal == 0 && positive.fractionTotal > 0) { |
560 | | // patterns like ".##" |
561 | 0 | minInt = 0; |
562 | 0 | minFrac = uprv_max(1, positive.fractionNumerals); |
563 | 63.9k | } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { |
564 | | // patterns like "#.##" |
565 | 0 | minInt = 1; |
566 | 0 | minFrac = 0; |
567 | 63.9k | } else { |
568 | 63.9k | minInt = positive.integerNumerals; |
569 | 63.9k | minFrac = positive.fractionNumerals; |
570 | 63.9k | } |
571 | | |
572 | | // Rounding settings |
573 | | // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage |
574 | 63.9k | if (positive.integerAtSigns > 0) { |
575 | 0 | properties.minimumFractionDigits = -1; |
576 | 0 | properties.maximumFractionDigits = -1; |
577 | 0 | properties.roundingIncrement = 0.0; |
578 | 0 | properties.minimumSignificantDigits = positive.integerAtSigns; |
579 | 0 | properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; |
580 | 63.9k | } else if (!positive.rounding.isZeroish()) { |
581 | 0 | if (!ignoreRounding) { |
582 | 0 | properties.minimumFractionDigits = minFrac; |
583 | 0 | properties.maximumFractionDigits = positive.fractionTotal; |
584 | 0 | properties.roundingIncrement = positive.rounding.toDouble(); |
585 | 0 | } else { |
586 | 0 | properties.minimumFractionDigits = -1; |
587 | 0 | properties.maximumFractionDigits = -1; |
588 | 0 | properties.roundingIncrement = 0.0; |
589 | 0 | } |
590 | 0 | properties.minimumSignificantDigits = -1; |
591 | 0 | properties.maximumSignificantDigits = -1; |
592 | 63.9k | } else { |
593 | 63.9k | if (!ignoreRounding) { |
594 | 63.9k | properties.minimumFractionDigits = minFrac; |
595 | 63.9k | properties.maximumFractionDigits = positive.fractionTotal; |
596 | 63.9k | properties.roundingIncrement = 0.0; |
597 | 63.9k | } else { |
598 | 0 | properties.minimumFractionDigits = -1; |
599 | 0 | properties.maximumFractionDigits = -1; |
600 | 0 | properties.roundingIncrement = 0.0; |
601 | 0 | } |
602 | 63.9k | properties.minimumSignificantDigits = -1; |
603 | 63.9k | properties.maximumSignificantDigits = -1; |
604 | 63.9k | } |
605 | | |
606 | | // If the pattern ends with a '.' then force the decimal point. |
607 | 63.9k | if (positive.hasDecimal && positive.fractionTotal == 0) { |
608 | 0 | properties.decimalSeparatorAlwaysShown = true; |
609 | 63.9k | } else { |
610 | 63.9k | properties.decimalSeparatorAlwaysShown = false; |
611 | 63.9k | } |
612 | | |
613 | | // Persist the currency as decimal separator |
614 | 63.9k | properties.currencyAsDecimal = positive.hasCurrencyDecimal; |
615 | | |
616 | | // Scientific notation settings |
617 | 63.9k | if (positive.exponentZeros > 0) { |
618 | 0 | properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; |
619 | 0 | properties.minimumExponentDigits = positive.exponentZeros; |
620 | 0 | if (positive.integerAtSigns == 0) { |
621 | | // patterns without '@' can define max integer digits, used for engineering notation |
622 | 0 | properties.minimumIntegerDigits = positive.integerNumerals; |
623 | 0 | properties.maximumIntegerDigits = positive.integerTotal; |
624 | 0 | } else { |
625 | | // patterns with '@' cannot define max integer digits |
626 | 0 | properties.minimumIntegerDigits = 1; |
627 | 0 | properties.maximumIntegerDigits = -1; |
628 | 0 | } |
629 | 63.9k | } else { |
630 | 63.9k | properties.exponentSignAlwaysShown = false; |
631 | 63.9k | properties.minimumExponentDigits = -1; |
632 | 63.9k | properties.minimumIntegerDigits = minInt; |
633 | 63.9k | properties.maximumIntegerDigits = -1; |
634 | 63.9k | } |
635 | | |
636 | | // Compute the affix patterns (required for both padding and affixes) |
637 | 63.9k | UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); |
638 | 63.9k | UnicodeString posSuffix = patternInfo.getString(0); |
639 | | |
640 | | // Padding settings |
641 | 63.9k | if (positive.hasPadding) { |
642 | | // The width of the positive prefix and suffix templates are included in the padding |
643 | 0 | int paddingWidth = positive.widthExceptAffixes + |
644 | 0 | AffixUtils::estimateLength(posPrefix, status) + |
645 | 0 | AffixUtils::estimateLength(posSuffix, status); |
646 | 0 | properties.formatWidth = paddingWidth; |
647 | 0 | UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); |
648 | 0 | if (rawPaddingString.length() == 1) { |
649 | 0 | properties.padString = rawPaddingString; |
650 | 0 | } else if (rawPaddingString.length() == 2) { |
651 | 0 | if (rawPaddingString.charAt(0) == u'\'') { |
652 | 0 | properties.padString.setTo(u"'", -1); |
653 | 0 | } else { |
654 | 0 | properties.padString = rawPaddingString; |
655 | 0 | } |
656 | 0 | } else { |
657 | 0 | properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); |
658 | 0 | } |
659 | 0 | properties.padPosition = positive.paddingLocation; |
660 | 63.9k | } else { |
661 | 63.9k | properties.formatWidth = -1; |
662 | 63.9k | properties.padString.setToBogus(); |
663 | 63.9k | properties.padPosition.nullify(); |
664 | 63.9k | } |
665 | | |
666 | | // Set the affixes |
667 | | // Always call the setter, even if the prefixes are empty, especially in the case of the |
668 | | // negative prefix pattern, to prevent default values from overriding the pattern. |
669 | 63.9k | properties.positivePrefixPattern = posPrefix; |
670 | 63.9k | properties.positiveSuffixPattern = posSuffix; |
671 | 63.9k | if (patternInfo.fHasNegativeSubpattern) { |
672 | 0 | properties.negativePrefixPattern = patternInfo.getString( |
673 | 0 | AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); |
674 | 0 | properties.negativeSuffixPattern = patternInfo.getString( |
675 | 0 | AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); |
676 | 63.9k | } else { |
677 | 63.9k | properties.negativePrefixPattern.setToBogus(); |
678 | 63.9k | properties.negativeSuffixPattern.setToBogus(); |
679 | 63.9k | } |
680 | | |
681 | | // Set the magnitude multiplier |
682 | 63.9k | if (positive.hasPercentSign) { |
683 | 0 | properties.magnitudeMultiplier = 2; |
684 | 63.9k | } else if (positive.hasPerMilleSign) { |
685 | 0 | properties.magnitudeMultiplier = 3; |
686 | 63.9k | } else { |
687 | 63.9k | properties.magnitudeMultiplier = 0; |
688 | 63.9k | } |
689 | 63.9k | } |
690 | | |
691 | | /////////////////////////////////////////////////////////////////// |
692 | | /// End PatternStringParser.java; begin PatternStringUtils.java /// |
693 | | /////////////////////////////////////////////////////////////////// |
694 | | |
695 | | // Determine whether a given roundingIncrement should be ignored for formatting |
696 | | // based on the current maxFrac value (maximum fraction digits). For example a |
697 | | // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac |
698 | | // is 2 or more. Note that roundingIncrements are rounded in significance, so |
699 | | // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. |
700 | | // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of |
701 | | // 0.005 is treated like 0.001 for significance). This is the reason for the |
702 | | // initial doubling below. |
703 | | // roundIncr must be non-zero. |
704 | 0 | bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { |
705 | 0 | if (maxFrac < 0) { |
706 | 0 | return false; |
707 | 0 | } |
708 | 0 | int32_t frac = 0; |
709 | 0 | roundIncr *= 2.0; |
710 | 0 | for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); |
711 | 0 | return (frac > maxFrac); |
712 | 0 | } |
713 | | |
714 | | UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, |
715 | 0 | UErrorCode& status) { |
716 | 0 | UnicodeString sb; |
717 | | |
718 | | // Convenience references |
719 | | // The uprv_min() calls prevent DoS |
720 | 0 | int32_t dosMax = 100; |
721 | 0 | int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); |
722 | 0 | int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); |
723 | 0 | bool useGrouping = properties.groupingUsed; |
724 | 0 | int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); |
725 | 0 | NullableValue<PadPosition> paddingLocation = properties.padPosition; |
726 | 0 | UnicodeString paddingString = properties.padString; |
727 | 0 | int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); |
728 | 0 | int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); |
729 | 0 | int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); |
730 | 0 | int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); |
731 | 0 | int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); |
732 | 0 | int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); |
733 | 0 | bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; |
734 | 0 | int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); |
735 | 0 | bool exponentShowPlusSign = properties.exponentSignAlwaysShown; |
736 | |
|
737 | 0 | AutoAffixPatternProvider affixProvider(properties, status); |
738 | | |
739 | | // Prefixes |
740 | 0 | sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX)); |
741 | 0 | int32_t afterPrefixPos = sb.length(); |
742 | | |
743 | | // Figure out the grouping sizes. |
744 | 0 | if (!useGrouping) { |
745 | 0 | grouping1 = 0; |
746 | 0 | grouping2 = 0; |
747 | 0 | } else if (grouping1 == grouping2) { |
748 | 0 | grouping1 = 0; |
749 | 0 | } |
750 | 0 | int32_t groupingLength = grouping1 + grouping2 + 1; |
751 | | |
752 | | // Figure out the digits we need to put in the pattern. |
753 | 0 | double increment = properties.roundingIncrement; |
754 | 0 | UnicodeString digitsString; |
755 | 0 | int32_t digitsStringScale = 0; |
756 | 0 | if (maxSig != uprv_min(dosMax, -1)) { |
757 | | // Significant Digits. |
758 | 0 | while (digitsString.length() < minSig) { |
759 | 0 | digitsString.append(u'@'); |
760 | 0 | } |
761 | 0 | while (digitsString.length() < maxSig) { |
762 | 0 | digitsString.append(u'#'); |
763 | 0 | } |
764 | 0 | } else if (increment != 0.0 && !ignoreRoundingIncrement(increment,maxFrac)) { |
765 | | // Rounding Increment. |
766 | 0 | DecimalQuantity incrementQuantity; |
767 | 0 | incrementQuantity.setToDouble(increment); |
768 | 0 | incrementQuantity.roundToInfinity(); |
769 | 0 | digitsStringScale = incrementQuantity.getLowerDisplayMagnitude(); |
770 | 0 | incrementQuantity.adjustMagnitude(-digitsStringScale); |
771 | 0 | incrementQuantity.setMinInteger(minInt - digitsStringScale); |
772 | 0 | UnicodeString str = incrementQuantity.toPlainString(); |
773 | 0 | if (str.charAt(0) == u'-') { |
774 | | // TODO: Unsupported operation exception or fail silently? |
775 | 0 | digitsString.append(str, 1, str.length() - 1); |
776 | 0 | } else { |
777 | 0 | digitsString.append(str); |
778 | 0 | } |
779 | 0 | } |
780 | 0 | while (digitsString.length() + digitsStringScale < minInt) { |
781 | 0 | digitsString.insert(0, u'0'); |
782 | 0 | } |
783 | 0 | while (-digitsStringScale < minFrac) { |
784 | 0 | digitsString.append(u'0'); |
785 | 0 | digitsStringScale--; |
786 | 0 | } |
787 | | |
788 | | // Write the digits to the string builder |
789 | 0 | int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); |
790 | 0 | m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; |
791 | 0 | int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; |
792 | 0 | for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { |
793 | 0 | int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; |
794 | 0 | if (di < 0 || di >= digitsString.length()) { |
795 | 0 | sb.append(u'#'); |
796 | 0 | } else { |
797 | 0 | sb.append(digitsString.charAt(di)); |
798 | 0 | } |
799 | | // Decimal separator |
800 | 0 | if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { |
801 | 0 | if (properties.currencyAsDecimal) { |
802 | 0 | sb.append(u'¤'); |
803 | 0 | } else { |
804 | 0 | sb.append(u'.'); |
805 | 0 | } |
806 | 0 | } |
807 | 0 | if (!useGrouping) { |
808 | 0 | continue; |
809 | 0 | } |
810 | | // Least-significant grouping separator |
811 | 0 | if (magnitude > 0 && magnitude == grouping1) { |
812 | 0 | sb.append(u','); |
813 | 0 | } |
814 | | // All other grouping separators |
815 | 0 | if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { |
816 | 0 | sb.append(u','); |
817 | 0 | } |
818 | 0 | } |
819 | | |
820 | | // Exponential notation |
821 | 0 | if (exponentDigits != uprv_min(dosMax, -1)) { |
822 | 0 | sb.append(u'E'); |
823 | 0 | if (exponentShowPlusSign) { |
824 | 0 | sb.append(u'+'); |
825 | 0 | } |
826 | 0 | for (int32_t i = 0; i < exponentDigits; i++) { |
827 | 0 | sb.append(u'0'); |
828 | 0 | } |
829 | 0 | } |
830 | | |
831 | | // Suffixes |
832 | 0 | int32_t beforeSuffixPos = sb.length(); |
833 | 0 | sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); |
834 | | |
835 | | // Resolve Padding |
836 | 0 | if (paddingWidth > 0 && !paddingLocation.isNull()) { |
837 | 0 | while (paddingWidth - sb.length() > 0) { |
838 | 0 | sb.insert(afterPrefixPos, u'#'); |
839 | 0 | beforeSuffixPos++; |
840 | 0 | } |
841 | 0 | int32_t addedLength; |
842 | 0 | switch (paddingLocation.get(status)) { |
843 | 0 | case PadPosition::UNUM_PAD_BEFORE_PREFIX: |
844 | 0 | addedLength = escapePaddingString(paddingString, sb, 0, status); |
845 | 0 | sb.insert(0, u'*'); |
846 | 0 | afterPrefixPos += addedLength + 1; |
847 | 0 | beforeSuffixPos += addedLength + 1; |
848 | 0 | break; |
849 | 0 | case PadPosition::UNUM_PAD_AFTER_PREFIX: |
850 | 0 | addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); |
851 | 0 | sb.insert(afterPrefixPos, u'*'); |
852 | 0 | afterPrefixPos += addedLength + 1; |
853 | 0 | beforeSuffixPos += addedLength + 1; |
854 | 0 | break; |
855 | 0 | case PadPosition::UNUM_PAD_BEFORE_SUFFIX: |
856 | 0 | escapePaddingString(paddingString, sb, beforeSuffixPos, status); |
857 | 0 | sb.insert(beforeSuffixPos, u'*'); |
858 | 0 | break; |
859 | 0 | case PadPosition::UNUM_PAD_AFTER_SUFFIX: |
860 | 0 | sb.append(u'*'); |
861 | 0 | escapePaddingString(paddingString, sb, sb.length(), status); |
862 | 0 | break; |
863 | 0 | } |
864 | 0 | if (U_FAILURE(status)) { return sb; } |
865 | 0 | } |
866 | | |
867 | | // Negative affixes |
868 | | // Ignore if the negative prefix pattern is "-" and the negative suffix is empty |
869 | 0 | if (affixProvider.get().hasNegativeSubpattern()) { |
870 | 0 | sb.append(u';'); |
871 | 0 | sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); |
872 | | // Copy the positive digit format into the negative. |
873 | | // This is optional; the pattern is the same as if '#' were appended here instead. |
874 | | // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. |
875 | | // See https://unicode-org.atlassian.net/browse/ICU-13707 |
876 | 0 | UnicodeString copy(sb); |
877 | 0 | sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); |
878 | 0 | sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); |
879 | 0 | } |
880 | |
|
881 | 0 | return sb; |
882 | 0 | } |
883 | | |
884 | | int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, |
885 | 0 | UErrorCode& status) { |
886 | 0 | (void) status; |
887 | 0 | if (input.length() == 0) { |
888 | 0 | input.setTo(kFallbackPaddingString, -1); |
889 | 0 | } |
890 | 0 | int startLength = output.length(); |
891 | 0 | if (input.length() == 1) { |
892 | 0 | if (input.compare(u"'", -1) == 0) { |
893 | 0 | output.insert(startIndex, u"''", -1); |
894 | 0 | } else { |
895 | 0 | output.insert(startIndex, input); |
896 | 0 | } |
897 | 0 | } else { |
898 | 0 | output.insert(startIndex, u'\''); |
899 | 0 | int offset = 1; |
900 | 0 | for (int i = 0; i < input.length(); i++) { |
901 | | // it's okay to deal in chars here because the quote mark is the only interesting thing. |
902 | 0 | char16_t ch = input.charAt(i); |
903 | 0 | if (ch == u'\'') { |
904 | 0 | output.insert(startIndex + offset, u"''", -1); |
905 | 0 | offset += 2; |
906 | 0 | } else { |
907 | 0 | output.insert(startIndex + offset, ch); |
908 | 0 | offset += 1; |
909 | 0 | } |
910 | 0 | } |
911 | 0 | output.insert(startIndex + offset, u'\''); |
912 | 0 | } |
913 | 0 | return output.length() - startLength; |
914 | 0 | } |
915 | | |
916 | | UnicodeString |
917 | | PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, |
918 | 0 | bool toLocalized, UErrorCode& status) { |
919 | | // Construct a table of strings to be converted between localized and standard. |
920 | 0 | static constexpr int32_t LEN = 21; |
921 | 0 | UnicodeString table[LEN][2]; |
922 | 0 | int standIdx = toLocalized ? 0 : 1; |
923 | 0 | int localIdx = toLocalized ? 1 : 0; |
924 | | // TODO: Add approximately sign here? |
925 | 0 | table[0][standIdx] = u"%"; |
926 | 0 | table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); |
927 | 0 | table[1][standIdx] = u"‰"; |
928 | 0 | table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); |
929 | 0 | table[2][standIdx] = u"."; |
930 | 0 | table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); |
931 | 0 | table[3][standIdx] = u","; |
932 | 0 | table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); |
933 | 0 | table[4][standIdx] = u"-"; |
934 | 0 | table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
935 | 0 | table[5][standIdx] = u"+"; |
936 | 0 | table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
937 | 0 | table[6][standIdx] = u";"; |
938 | 0 | table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); |
939 | 0 | table[7][standIdx] = u"@"; |
940 | 0 | table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); |
941 | 0 | table[8][standIdx] = u"E"; |
942 | 0 | table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); |
943 | 0 | table[9][standIdx] = u"*"; |
944 | 0 | table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); |
945 | 0 | table[10][standIdx] = u"#"; |
946 | 0 | table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); |
947 | 0 | for (int i = 0; i < 10; i++) { |
948 | 0 | table[11 + i][standIdx] = u'0' + i; |
949 | 0 | table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); |
950 | 0 | } |
951 | | |
952 | | // Special case: quotes are NOT allowed to be in any localIdx strings. |
953 | | // Substitute them with '’' instead. |
954 | 0 | for (int32_t i = 0; i < LEN; i++) { |
955 | 0 | table[i][localIdx].findAndReplace(u'\'', u'’'); |
956 | 0 | } |
957 | | |
958 | | // Iterate through the string and convert. |
959 | | // State table: |
960 | | // 0 => base state |
961 | | // 1 => first char inside a quoted sequence in input and output string |
962 | | // 2 => inside a quoted sequence in input and output string |
963 | | // 3 => first char after a close quote in input string; |
964 | | // close quote still needs to be written to output string |
965 | | // 4 => base state in input string; inside quoted sequence in output string |
966 | | // 5 => first char inside a quoted sequence in input string; |
967 | | // inside quoted sequence in output string |
968 | 0 | UnicodeString result; |
969 | 0 | int state = 0; |
970 | 0 | for (int offset = 0; offset < input.length(); offset++) { |
971 | 0 | char16_t ch = input.charAt(offset); |
972 | | |
973 | | // Handle a quote character (state shift) |
974 | 0 | if (ch == u'\'') { |
975 | 0 | if (state == 0) { |
976 | 0 | result.append(u'\''); |
977 | 0 | state = 1; |
978 | 0 | continue; |
979 | 0 | } else if (state == 1) { |
980 | 0 | result.append(u'\''); |
981 | 0 | state = 0; |
982 | 0 | continue; |
983 | 0 | } else if (state == 2) { |
984 | 0 | state = 3; |
985 | 0 | continue; |
986 | 0 | } else if (state == 3) { |
987 | 0 | result.append(u'\''); |
988 | 0 | result.append(u'\''); |
989 | 0 | state = 1; |
990 | 0 | continue; |
991 | 0 | } else if (state == 4) { |
992 | 0 | state = 5; |
993 | 0 | continue; |
994 | 0 | } else { |
995 | 0 | U_ASSERT(state == 5); |
996 | 0 | result.append(u'\''); |
997 | 0 | result.append(u'\''); |
998 | 0 | state = 4; |
999 | 0 | continue; |
1000 | 0 | } |
1001 | 0 | } |
1002 | | |
1003 | 0 | if (state == 0 || state == 3 || state == 4) { |
1004 | 0 | for (auto& pair : table) { |
1005 | | // Perform a greedy match on this symbol string |
1006 | 0 | UnicodeString temp = input.tempSubString(offset, pair[0].length()); |
1007 | 0 | if (temp == pair[0]) { |
1008 | | // Skip ahead past this region for the next iteration |
1009 | 0 | offset += pair[0].length() - 1; |
1010 | 0 | if (state == 3 || state == 4) { |
1011 | 0 | result.append(u'\''); |
1012 | 0 | state = 0; |
1013 | 0 | } |
1014 | 0 | result.append(pair[1]); |
1015 | 0 | goto continue_outer; |
1016 | 0 | } |
1017 | 0 | } |
1018 | | // No replacement found. Check if a special quote is necessary |
1019 | 0 | for (auto& pair : table) { |
1020 | 0 | UnicodeString temp = input.tempSubString(offset, pair[1].length()); |
1021 | 0 | if (temp == pair[1]) { |
1022 | 0 | if (state == 0) { |
1023 | 0 | result.append(u'\''); |
1024 | 0 | state = 4; |
1025 | 0 | } |
1026 | 0 | result.append(ch); |
1027 | 0 | goto continue_outer; |
1028 | 0 | } |
1029 | 0 | } |
1030 | | // Still nothing. Copy the char verbatim. (Add a close quote if necessary) |
1031 | 0 | if (state == 3 || state == 4) { |
1032 | 0 | result.append(u'\''); |
1033 | 0 | state = 0; |
1034 | 0 | } |
1035 | 0 | result.append(ch); |
1036 | 0 | } else { |
1037 | 0 | U_ASSERT(state == 1 || state == 2 || state == 5); |
1038 | 0 | result.append(ch); |
1039 | 0 | state = 2; |
1040 | 0 | } |
1041 | 0 | continue_outer:; |
1042 | 0 | } |
1043 | | // Resolve final quotes |
1044 | 0 | if (state == 3 || state == 4) { |
1045 | 0 | result.append(u'\''); |
1046 | 0 | state = 0; |
1047 | 0 | } |
1048 | 0 | if (state != 0) { |
1049 | | // Malformed localized pattern: unterminated quote |
1050 | 0 | status = U_PATTERN_SYNTAX_ERROR; |
1051 | 0 | } |
1052 | 0 | return result; |
1053 | 0 | } |
1054 | | |
1055 | | void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, |
1056 | | PatternSignType patternSignType, |
1057 | | bool approximately, |
1058 | | StandardPlural::Form plural, |
1059 | | bool perMilleReplacesPercent, |
1060 | | bool dropCurrencySymbols, |
1061 | 1.46M | UnicodeString& output) { |
1062 | | |
1063 | | // Should the output render '+' where '-' would normally appear in the pattern? |
1064 | 1.46M | bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) |
1065 | 1.46M | && !patternInfo.positiveHasPlusSign(); |
1066 | | |
1067 | | // Should we use the affix from the negative subpattern? |
1068 | | // (If not, we will use the positive subpattern.) |
1069 | 1.46M | bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() |
1070 | 1.46M | && (patternSignType == PATTERN_SIGN_TYPE_NEG |
1071 | 62 | || (patternInfo.negativeHasMinusSign() && (plusReplacesMinusSign || approximately))); |
1072 | | |
1073 | | // Resolve the flags for the affix pattern. |
1074 | 1.46M | int flags = 0; |
1075 | 1.46M | if (useNegativeAffixPattern) { |
1076 | 40 | flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; |
1077 | 40 | } |
1078 | 1.46M | if (isPrefix) { |
1079 | 732k | flags |= AffixPatternProvider::AFFIX_PREFIX; |
1080 | 732k | } |
1081 | 1.46M | if (plural != StandardPlural::Form::COUNT) { |
1082 | 10.5k | U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); |
1083 | 10.5k | flags |= plural; |
1084 | 10.5k | } |
1085 | | |
1086 | | // Should we prepend a sign to the pattern? |
1087 | 1.46M | bool prependSign; |
1088 | 1.46M | if (!isPrefix || useNegativeAffixPattern) { |
1089 | 732k | prependSign = false; |
1090 | 732k | } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { |
1091 | 185k | prependSign = true; |
1092 | 547k | } else { |
1093 | 547k | prependSign = plusReplacesMinusSign || approximately; |
1094 | 547k | } |
1095 | | |
1096 | | // What symbols should take the place of the sign placeholder? |
1097 | 1.46M | const char16_t* signSymbols = u"-"; |
1098 | 1.46M | if (approximately) { |
1099 | 0 | if (plusReplacesMinusSign) { |
1100 | 0 | signSymbols = u"~+"; |
1101 | 0 | } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { |
1102 | 0 | signSymbols = u"~-"; |
1103 | 0 | } else { |
1104 | 0 | signSymbols = u"~"; |
1105 | 0 | } |
1106 | 1.46M | } else if (plusReplacesMinusSign) { |
1107 | 358k | signSymbols = u"+"; |
1108 | 358k | } |
1109 | | |
1110 | | // Compute the number of tokens in the affix pattern (signSymbols is considered one token). |
1111 | 1.46M | int length = patternInfo.length(flags) + (prependSign ? 1 : 0); |
1112 | | |
1113 | | // Finally, set the result into the StringBuilder. |
1114 | 1.46M | output.remove(); |
1115 | 1.83M | for (int index = 0; index < length; index++) { |
1116 | 371k | char16_t candidate; |
1117 | 371k | if (prependSign && index == 0) { |
1118 | 364k | candidate = u'-'; |
1119 | 364k | } else if (prependSign) { |
1120 | 98 | candidate = patternInfo.charAt(flags, index - 1); |
1121 | 7.37k | } else { |
1122 | 7.37k | candidate = patternInfo.charAt(flags, index); |
1123 | 7.37k | } |
1124 | 371k | if (candidate == u'-') { |
1125 | 364k | if (u_strlen(signSymbols) == 1) { |
1126 | 364k | candidate = signSymbols[0]; |
1127 | 364k | } else { |
1128 | 0 | output.append(signSymbols[0]); |
1129 | 0 | candidate = signSymbols[1]; |
1130 | 0 | } |
1131 | 364k | } |
1132 | 371k | if (perMilleReplacesPercent && candidate == u'%') { |
1133 | 0 | candidate = u'‰'; |
1134 | 0 | } |
1135 | 371k | if (dropCurrencySymbols && candidate == u'\u00A4') { |
1136 | 0 | continue; |
1137 | 0 | } |
1138 | 371k | output.append(candidate); |
1139 | 371k | } |
1140 | 1.46M | } |
1141 | | |
1142 | 1.45M | PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { |
1143 | 1.45M | switch (signDisplay) { |
1144 | 21.1k | case UNUM_SIGN_AUTO: |
1145 | 21.1k | case UNUM_SIGN_ACCOUNTING: |
1146 | 21.1k | switch (signum) { |
1147 | 5.26k | case SIGNUM_NEG: |
1148 | 6.67k | case SIGNUM_NEG_ZERO: |
1149 | 6.67k | return PATTERN_SIGN_TYPE_NEG; |
1150 | 3.87k | case SIGNUM_POS_ZERO: |
1151 | 14.5k | case SIGNUM_POS: |
1152 | 14.5k | return PATTERN_SIGN_TYPE_POS; |
1153 | 0 | default: |
1154 | 0 | break; |
1155 | 21.1k | } |
1156 | 0 | break; |
1157 | | |
1158 | 28 | case UNUM_SIGN_ALWAYS: |
1159 | 32 | case UNUM_SIGN_ACCOUNTING_ALWAYS: |
1160 | 32 | switch (signum) { |
1161 | 12 | case SIGNUM_NEG: |
1162 | 12 | case SIGNUM_NEG_ZERO: |
1163 | 12 | return PATTERN_SIGN_TYPE_NEG; |
1164 | 6 | case SIGNUM_POS_ZERO: |
1165 | 20 | case SIGNUM_POS: |
1166 | 20 | return PATTERN_SIGN_TYPE_POS_SIGN; |
1167 | 0 | default: |
1168 | 0 | break; |
1169 | 32 | } |
1170 | 0 | break; |
1171 | | |
1172 | 1.43M | case UNUM_SIGN_EXCEPT_ZERO: |
1173 | 1.43M | case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
1174 | 1.43M | switch (signum) { |
1175 | 358k | case SIGNUM_NEG: |
1176 | 358k | return PATTERN_SIGN_TYPE_NEG; |
1177 | 358k | case SIGNUM_NEG_ZERO: |
1178 | 716k | case SIGNUM_POS_ZERO: |
1179 | 716k | return PATTERN_SIGN_TYPE_POS; |
1180 | 358k | case SIGNUM_POS: |
1181 | 358k | return PATTERN_SIGN_TYPE_POS_SIGN; |
1182 | 0 | default: |
1183 | 0 | break; |
1184 | 1.43M | } |
1185 | 0 | break; |
1186 | | |
1187 | 8 | case UNUM_SIGN_NEGATIVE: |
1188 | 12 | case UNUM_SIGN_ACCOUNTING_NEGATIVE: |
1189 | 12 | switch (signum) { |
1190 | 2 | case SIGNUM_NEG: |
1191 | 2 | return PATTERN_SIGN_TYPE_NEG; |
1192 | 0 | case SIGNUM_NEG_ZERO: |
1193 | 4 | case SIGNUM_POS_ZERO: |
1194 | 10 | case SIGNUM_POS: |
1195 | 10 | return PATTERN_SIGN_TYPE_POS; |
1196 | 0 | default: |
1197 | 0 | break; |
1198 | 12 | } |
1199 | 0 | break; |
1200 | | |
1201 | 4 | case UNUM_SIGN_NEVER: |
1202 | 4 | return PATTERN_SIGN_TYPE_POS; |
1203 | | |
1204 | 0 | default: |
1205 | 0 | break; |
1206 | 1.45M | } |
1207 | | |
1208 | 1.45M | UPRV_UNREACHABLE_EXIT; |
1209 | 0 | return PATTERN_SIGN_TYPE_POS; |
1210 | 1.45M | } |
1211 | | |
1212 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |