Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/numparse_decimal.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
// Allow implicit conversion from char16_t* to UnicodeString for this file:
9
// Helpful in toString methods and elsewhere.
10
#define UNISTR_FROM_STRING_EXPLICIT
11
12
#include "numparse_types.h"
13
#include "numparse_decimal.h"
14
#include "static_unicode_sets.h"
15
#include "numparse_utils.h"
16
#include "unicode/uchar.h"
17
#include "putilimp.h"
18
#include "number_decimalquantity.h"
19
20
using namespace icu;
21
using namespace icu::numparse;
22
using namespace icu::numparse::impl;
23
24
25
DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
26
0
                               parse_flags_t parseFlags) {
27
0
    if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
28
0
        groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
29
0
        decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
30
0
    } else {
31
0
        groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
32
0
        decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
33
0
    }
34
0
    bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
35
0
    unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
36
0
                                                : unisets::ALL_SEPARATORS;
37
0
38
0
    // Attempt to find separators in the static cache
39
0
40
0
    groupingUniSet = unisets::get(groupingKey);
41
0
    unisets::Key decimalKey = unisets::chooseFrom(
42
0
            decimalSeparator,
43
0
            strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
44
0
            strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
45
0
    if (decimalKey >= 0) {
46
0
        decimalUniSet = unisets::get(decimalKey);
47
0
    } else if (!decimalSeparator.isEmpty()) {
48
0
        auto* set = new UnicodeSet();
49
0
        set->add(decimalSeparator.char32At(0));
50
0
        set->freeze();
51
0
        decimalUniSet = set;
52
0
        fLocalDecimalUniSet.adoptInstead(set);
53
0
    } else {
54
0
        decimalUniSet = unisets::get(unisets::EMPTY);
55
0
    }
56
0
57
0
    if (groupingKey >= 0 && decimalKey >= 0) {
58
0
        // Everything is available in the static cache
59
0
        separatorSet = groupingUniSet;
60
0
        leadSet = unisets::get(
61
0
                strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
62
0
                                 : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
63
0
    } else {
64
0
        auto* set = new UnicodeSet();
65
0
        set->addAll(*groupingUniSet);
66
0
        set->addAll(*decimalUniSet);
67
0
        set->freeze();
68
0
        separatorSet = set;
69
0
        fLocalSeparatorSet.adoptInstead(set);
70
0
        leadSet = nullptr;
71
0
    }
72
0
73
0
    UChar32 cpZero = symbols.getCodePointZero();
74
0
    if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
75
0
        // Uncommon case: okay to allocate.
76
0
        auto digitStrings = new UnicodeString[10];
77
0
        fLocalDigitStrings.adoptInstead(digitStrings);
78
0
        for (int32_t i = 0; i <= 9; i++) {
79
0
            digitStrings[i] = symbols.getConstDigitSymbol(i);
80
0
        }
81
0
    }
82
0
83
0
    requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
84
0
    groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
85
0
    integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
86
0
    grouping1 = grouper.getPrimary();
87
0
    grouping2 = grouper.getSecondary();
88
0
89
0
    // Fraction grouping parsing is disabled for now but could be enabled later.
90
0
    // See http://bugs.icu-project.org/trac/ticket/10794
91
0
    // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
92
0
}
93
94
0
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
95
0
    return match(segment, result, 0, status);
96
0
}
97
98
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
99
0
                           UErrorCode&) const {
100
0
    if (result.seenNumber() && exponentSign == 0) {
101
0
        // A number has already been consumed.
102
0
        return false;
103
0
    } else if (exponentSign != 0) {
104
0
        // scientific notation always comes after the number
105
0
        U_ASSERT(!result.quantity.bogus);
106
0
    }
107
0
108
0
    // Initial offset before any character consumption.
109
0
    int32_t initialOffset = segment.getOffset();
110
0
111
0
    // Return value: whether to ask for more characters.
112
0
    bool maybeMore = false;
113
0
114
0
    // All digits consumed so far.
115
0
    number::impl::DecimalQuantity digitsConsumed;
116
0
    digitsConsumed.bogus = true;
117
0
118
0
    // The total number of digits after the decimal place, used for scaling the result.
119
0
    int32_t digitsAfterDecimalPlace = 0;
120
0
121
0
    // The actual grouping and decimal separators used in the string.
122
0
    // If non-null, we have seen that token.
123
0
    UnicodeString actualGroupingString;
124
0
    UnicodeString actualDecimalString;
125
0
    actualGroupingString.setToBogus();
126
0
    actualDecimalString.setToBogus();
127
0
128
0
    // Information for two groups: the previous group and the current group.
129
0
    //
130
0
    // Each group has three pieces of information:
131
0
    //
132
0
    // Offset: the string position of the beginning of the group, including a leading separator
133
0
    // if there was a leading separator. This is needed in case we need to rewind the parse to
134
0
    // that position.
135
0
    //
136
0
    // Separator type:
137
0
    // 0 => beginning of string
138
0
    // 1 => lead separator is a grouping separator
139
0
    // 2 => lead separator is a decimal separator
140
0
    //
141
0
    // Count: the number of digits in the group. If -1, the group has been validated.
142
0
    int32_t currGroupOffset = 0;
143
0
    int32_t currGroupSepType = 0;
144
0
    int32_t currGroupCount = 0;
145
0
    int32_t prevGroupOffset = -1;
146
0
    int32_t prevGroupSepType = -1;
147
0
    int32_t prevGroupCount = -1;
148
0
149
0
    while (segment.length() > 0) {
150
0
        maybeMore = false;
151
0
152
0
        // Attempt to match a digit.
153
0
        int8_t digit = -1;
154
0
155
0
        // Try by code point digit value.
156
0
        UChar32 cp = segment.getCodePoint();
157
0
        if (u_isdigit(cp)) {
158
0
            segment.adjustOffset(U16_LENGTH(cp));
159
0
            digit = static_cast<int8_t>(u_digit(cp, 10));
160
0
        }
161
0
162
0
        // Try by digit string.
163
0
        if (digit == -1 && !fLocalDigitStrings.isNull()) {
164
0
            for (int32_t i = 0; i < 10; i++) {
165
0
                const UnicodeString& str = fLocalDigitStrings[i];
166
0
                if (str.isEmpty()) {
167
0
                    continue;
168
0
                }
169
0
                int32_t overlap = segment.getCommonPrefixLength(str);
170
0
                if (overlap == str.length()) {
171
0
                    segment.adjustOffset(overlap);
172
0
                    digit = static_cast<int8_t>(i);
173
0
                    break;
174
0
                }
175
0
                maybeMore = maybeMore || (overlap == segment.length());
176
0
            }
177
0
        }
178
0
179
0
        if (digit >= 0) {
180
0
            // Digit was found.
181
0
            if (digitsConsumed.bogus) {
182
0
                digitsConsumed.bogus = false;
183
0
                digitsConsumed.clear();
184
0
            }
185
0
            digitsConsumed.appendDigit(digit, 0, true);
186
0
            currGroupCount++;
187
0
            if (!actualDecimalString.isBogus()) {
188
0
                digitsAfterDecimalPlace++;
189
0
            }
190
0
            continue;
191
0
        }
192
0
193
0
        // Attempt to match a literal grouping or decimal separator.
194
0
        bool isDecimal = false;
195
0
        bool isGrouping = false;
196
0
197
0
        // 1) Attempt the decimal separator string literal.
198
0
        // if (we have not seen a decimal separator yet) { ... }
199
0
        if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
200
0
            int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
201
0
            maybeMore = maybeMore || (overlap == segment.length());
202
0
            if (overlap == decimalSeparator.length()) {
203
0
                isDecimal = true;
204
0
                actualDecimalString = decimalSeparator;
205
0
            }
206
0
        }
207
0
208
0
        // 2) Attempt to match the actual grouping string literal.
209
0
        if (!actualGroupingString.isBogus()) {
210
0
            int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
211
0
            maybeMore = maybeMore || (overlap == segment.length());
212
0
            if (overlap == actualGroupingString.length()) {
213
0
                isGrouping = true;
214
0
            }
215
0
        }
216
0
217
0
        // 2.5) Attempt to match a new the grouping separator string literal.
218
0
        // if (we have not seen a grouping or decimal separator yet) { ... }
219
0
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
220
0
            !groupingSeparator.isEmpty()) {
221
0
            int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
222
0
            maybeMore = maybeMore || (overlap == segment.length());
223
0
            if (overlap == groupingSeparator.length()) {
224
0
                isGrouping = true;
225
0
                actualGroupingString = groupingSeparator;
226
0
            }
227
0
        }
228
0
229
0
        // 3) Attempt to match a decimal separator from the equivalence set.
230
0
        // if (we have not seen a decimal separator yet) { ... }
231
0
        // The !isGrouping is to confirm that we haven't yet matched the current character.
232
0
        if (!isGrouping && actualDecimalString.isBogus()) {
233
0
            if (decimalUniSet->contains(cp)) {
234
0
                isDecimal = true;
235
0
                actualDecimalString = UnicodeString(cp);
236
0
            }
237
0
        }
238
0
239
0
        // 4) Attempt to match a grouping separator from the equivalence set.
240
0
        // if (we have not seen a grouping or decimal separator yet) { ... }
241
0
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
242
0
            if (groupingUniSet->contains(cp)) {
243
0
                isGrouping = true;
244
0
                actualGroupingString = UnicodeString(cp);
245
0
            }
246
0
        }
247
0
248
0
        // Leave if we failed to match this as a separator.
249
0
        if (!isDecimal && !isGrouping) {
250
0
            break;
251
0
        }
252
0
253
0
        // Check for conditions when we don't want to accept the separator.
254
0
        if (isDecimal && integerOnly) {
255
0
            break;
256
0
        } else if (currGroupSepType == 2 && isGrouping) {
257
0
            // Fraction grouping
258
0
            break;
259
0
        }
260
0
261
0
        // Validate intermediate grouping sizes.
262
0
        bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
263
0
        bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
264
0
        if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
265
0
            // Invalid grouping sizes.
266
0
            if (isGrouping && currGroupCount == 0) {
267
0
                // Trailing grouping separators: these are taken care of below
268
0
                U_ASSERT(currGroupSepType == 1);
269
0
            } else if (requireGroupingMatch) {
270
0
                // Strict mode: reject the parse
271
0
                digitsConsumed.clear();
272
0
                digitsConsumed.bogus = true;
273
0
            }
274
0
            break;
275
0
        } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
276
0
            break;
277
0
        } else {
278
0
            // Grouping sizes OK so far.
279
0
            prevGroupOffset = currGroupOffset;
280
0
            prevGroupCount = currGroupCount;
281
0
            if (isDecimal) {
282
0
                // Do not validate this group any more.
283
0
                prevGroupSepType = -1;
284
0
            } else {
285
0
                prevGroupSepType = currGroupSepType;
286
0
            }
287
0
        }
288
0
289
0
        // OK to accept the separator.
290
0
        // Special case: don't update currGroup if it is empty; this allows two grouping
291
0
        // separators in a row in lenient mode.
292
0
        if (currGroupCount != 0) {
293
0
            currGroupOffset = segment.getOffset();
294
0
        }
295
0
        currGroupSepType = isGrouping ? 1 : 2;
296
0
        currGroupCount = 0;
297
0
        if (isGrouping) {
298
0
            segment.adjustOffset(actualGroupingString.length());
299
0
        } else {
300
0
            segment.adjustOffset(actualDecimalString.length());
301
0
        }
302
0
    }
303
0
304
0
    // End of main loop.
305
0
    // Back up if there was a trailing grouping separator.
306
0
    // Shift prev -> curr so we can check it as a final group.
307
0
    if (currGroupSepType != 2 && currGroupCount == 0) {
308
0
        maybeMore = true;
309
0
        segment.setOffset(currGroupOffset);
310
0
        currGroupOffset = prevGroupOffset;
311
0
        currGroupSepType = prevGroupSepType;
312
0
        currGroupCount = prevGroupCount;
313
0
        prevGroupOffset = -1;
314
0
        prevGroupSepType = 0;
315
0
        prevGroupCount = 1;
316
0
    }
317
0
318
0
    // Validate final grouping sizes.
319
0
    bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
320
0
    bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
321
0
    if (!requireGroupingMatch) {
322
0
        // The cases we need to handle here are lone digits.
323
0
        // Examples: "1,1"  "1,1,"  "1,1,1"  "1,1,1,"  ",1" (all parse as 1)
324
0
        // See more examples in numberformattestspecification.txt
325
0
        int32_t digitsToRemove = 0;
326
0
        if (!prevValidSecondary) {
327
0
            segment.setOffset(prevGroupOffset);
328
0
            digitsToRemove += prevGroupCount;
329
0
            digitsToRemove += currGroupCount;
330
0
        } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
331
0
            maybeMore = true;
332
0
            segment.setOffset(currGroupOffset);
333
0
            digitsToRemove += currGroupCount;
334
0
        }
335
0
        if (digitsToRemove != 0) {
336
0
            digitsConsumed.adjustMagnitude(-digitsToRemove);
337
0
            digitsConsumed.truncate();
338
0
        }
339
0
        prevValidSecondary = true;
340
0
        currValidPrimary = true;
341
0
    }
342
0
    if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
343
0
        // Grouping failure.
344
0
        digitsConsumed.bogus = true;
345
0
    }
346
0
347
0
    // Strings that start with a separator but have no digits,
348
0
    // or strings that failed a grouping size check.
349
0
    if (digitsConsumed.bogus) {
350
0
        maybeMore = maybeMore || (segment.length() == 0);
351
0
        segment.setOffset(initialOffset);
352
0
        return maybeMore;
353
0
    }
354
0
355
0
    // We passed all inspections. Start post-processing.
356
0
357
0
    // Adjust for fraction part.
358
0
    digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
359
0
360
0
    // Set the digits, either normal or exponent.
361
0
    if (exponentSign != 0 && segment.getOffset() != initialOffset) {
362
0
        bool overflow = false;
363
0
        if (digitsConsumed.fitsInLong()) {
364
0
            int64_t exponentLong = digitsConsumed.toLong(false);
365
0
            U_ASSERT(exponentLong >= 0);
366
0
            if (exponentLong <= INT32_MAX) {
367
0
                auto exponentInt = static_cast<int32_t>(exponentLong);
368
0
                if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
369
0
                    overflow = true;
370
0
                }
371
0
            } else {
372
0
                overflow = true;
373
0
            }
374
0
        } else {
375
0
            overflow = true;
376
0
        }
377
0
        if (overflow) {
378
0
            if (exponentSign == -1) {
379
0
                // Set to zero
380
0
                result.quantity.clear();
381
0
            } else {
382
0
                // Set to infinity
383
0
                result.quantity.bogus = true;
384
0
                result.flags |= FLAG_INFINITY;
385
0
            }
386
0
        }
387
0
    } else {
388
0
        result.quantity = digitsConsumed;
389
0
    }
390
0
391
0
    // Set other information into the result and return.
392
0
    if (!actualDecimalString.isBogus()) {
393
0
        result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
394
0
    }
395
0
    result.setCharsConsumed(segment);
396
0
    return segment.length() == 0 || maybeMore;
397
0
}
398
399
0
bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
400
0
    if (requireGroupingMatch) {
401
0
        if (sepType == -1) {
402
0
            // No such group (prevGroup before first shift).
403
0
            return true;
404
0
        } else if (sepType == 0) {
405
0
            // First group.
406
0
            if (isPrimary) {
407
0
                // No grouping separators is OK.
408
0
                return true;
409
0
            } else {
410
0
                return count != 0 && count <= grouping2;
411
0
            }
412
0
        } else if (sepType == 1) {
413
0
            // Middle group.
414
0
            if (isPrimary) {
415
0
                return count == grouping1;
416
0
            } else {
417
0
                return count == grouping2;
418
0
            }
419
0
        } else {
420
0
            U_ASSERT(sepType == 2);
421
0
            // After the decimal separator.
422
0
            return true;
423
0
        }
424
0
    } else {
425
0
        if (sepType == 1) {
426
0
            // #11230: don't accept middle groups with only 1 digit.
427
0
            return count != 1;
428
0
        } else {
429
0
            return true;
430
0
        }
431
0
    }
432
0
}
433
434
0
bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
435
0
    // The common case uses a static leadSet for efficiency.
436
0
    if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
437
0
        return segment.startsWith(*leadSet);
438
0
    }
439
0
    if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
440
0
        return true;
441
0
    }
442
0
    if (fLocalDigitStrings.isNull()) {
443
0
        return false;
444
0
    }
445
0
    for (int32_t i = 0; i < 10; i++) {
446
0
        if (segment.startsWith(fLocalDigitStrings[i])) {
447
0
            return true;
448
0
        }
449
0
    }
450
0
    return false;
451
0
}
452
453
0
UnicodeString DecimalMatcher::toString() const {
454
0
    return u"<Decimal>";
455
0
}
456
457
458
#endif /* #if !UCONFIG_NO_FORMATTING */