/src/icu/source/i18n/numparse_scientific.cpp

Source (jump to first uncovered line)
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"
#include "string_segment.h"

using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;


namespace {

inline const UnicodeSet& minusSignSet() {
    return *unisets::get(unisets::MINUS_SIGN);
}

inline const UnicodeSet& plusSignSet() {
    return *unisets::get(unisets::PLUS_SIGN);
}

} // namespace


ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
        : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
          fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {

    const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
    if (minusSignSet().contains(minusSign)) {
        fCustomMinusSign.setToBogus();
    } else {
        fCustomMinusSign = minusSign;
    }

    const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
    if (plusSignSet().contains(plusSign)) {
        fCustomPlusSign.setToBogus();
    } else {
        fCustomPlusSign = plusSign;
    }
}

bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    // Only accept scientific notation after the mantissa.
    if (!result.seenNumber()) {
        return false;
    }

    // Only accept one exponent per string.
    if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
        return false;
    }

    // First match the scientific separator, and then match another number after it.
    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
    int32_t initialOffset = segment.getOffset();
    int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
    if (overlap == fExponentSeparatorString.length()) {
        // Full exponent separator match.

        // First attempt to get a code point, returning true if we can't get one.
        if (segment.length() == overlap) {
            return true;
        }
        segment.adjustOffset(overlap);

        // Allow ignorables before the sign.
        // Note: call site is guarded by the segment.length() check above.
        // Note: the ignorables matcher should not touch the result.
        fIgnorablesMatcher.match(segment, result, status);
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // Allow a sign, and then try to match digits.
        int8_t exponentSign = 1;
        if (segment.startsWith(minusSignSet())) {
            exponentSign = -1;
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(plusSignSet())) {
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(fCustomMinusSign)) {
            overlap = segment.getCommonPrefixLength(fCustomMinusSign);
            if (overlap != fCustomMinusSign.length()) {
                // Partial custom sign match
                segment.setOffset(initialOffset);
                return true;
            }
            exponentSign = -1;
            segment.adjustOffset(overlap);
        } else if (segment.startsWith(fCustomPlusSign)) {
            overlap = segment.getCommonPrefixLength(fCustomPlusSign);
            if (overlap != fCustomPlusSign.length()) {
                // Partial custom sign match
                segment.setOffset(initialOffset);
                return true;
            }
            segment.adjustOffset(overlap);
        }

        // Return true if the segment is empty.
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // Allow ignorables after the sign.
        // Note: call site is guarded by the segment.length() check above.
        // Note: the ignorables matcher should not touch the result.
        fIgnorablesMatcher.match(segment, result, status);
        if (segment.length() == 0) {
            segment.setOffset(initialOffset);
            return true;
        }

        // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
        bool wasBogus = result.quantity.bogus;
        result.quantity.bogus = false;
        int digitsOffset = segment.getOffset();
        bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
        result.quantity.bogus = wasBogus;

        if (segment.getOffset() != digitsOffset) {
            // At least one exponent digit was matched.
            result.flags |= FLAG_HAS_EXPONENT;
        } else {
            // No exponent digits were matched
            segment.setOffset(initialOffset);
        }
        return digitsReturnValue;

    } else if (overlap == segment.length()) {
        // Partial exponent separator match
        return true;
    }

    // No match
    return false;
}

bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
    return segment.startsWith(fExponentSeparatorString);
}

UnicodeString ScientificMatcher::toString() const {
    return u"<Scientific>";
}


#endif /* #if !UCONFIG_NO_FORMATTING */

Line	Count	Source (jump to first uncovered line)
1		// © 2018 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3
4		#include "unicode/utypes.h"
5
6		#if !UCONFIG_NO_FORMATTING
7
8		// Allow implicit conversion from char16_t* to UnicodeString for this file:
9		// Helpful in toString methods and elsewhere.
10		#define UNISTR_FROM_STRING_EXPLICIT
11
12		#include "numparse_types.h"
13		#include "numparse_scientific.h"
14		#include "static_unicode_sets.h"
15		#include "string_segment.h"
16
17		using namespace icu;
18		using namespace icu::numparse;
19		using namespace icu::numparse::impl;
20
21
22		namespace {
23
24	0	inline const UnicodeSet& minusSignSet() {
25	0	return *unisets::get(unisets::MINUS_SIGN);
26	0	}
27
28	0	inline const UnicodeSet& plusSignSet() {
29	0	return *unisets::get(unisets::PLUS_SIGN);
30	0	}
31
32		} // namespace
33
34
35		ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
36	0	: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
37	0	fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY \| PARSE_FLAG_GROUPING_DISABLED),
38	0	fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
39
40	0	const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
41	0	if (minusSignSet().contains(minusSign)) {
42	0	fCustomMinusSign.setToBogus();
43	0	} else {
44	0	fCustomMinusSign = minusSign;
45	0	}
46
47	0	const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
48	0	if (plusSignSet().contains(plusSign)) {
49	0	fCustomPlusSign.setToBogus();
50	0	} else {
51	0	fCustomPlusSign = plusSign;
52	0	}
53	0	}
54
55	0	bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
56		// Only accept scientific notation after the mantissa.
57	0	if (!result.seenNumber()) {
58	0	return false;
59	0	}
60
61		// Only accept one exponent per string.
62	0	if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
63	0	return false;
64	0	}
65
66		// First match the scientific separator, and then match another number after it.
67		// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
68	0	int32_t initialOffset = segment.getOffset();
69	0	int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
70	0	if (overlap == fExponentSeparatorString.length()) {
71		// Full exponent separator match.
72
73		// First attempt to get a code point, returning true if we can't get one.
74	0	if (segment.length() == overlap) {
75	0	return true;
76	0	}
77	0	segment.adjustOffset(overlap);
78
79		// Allow ignorables before the sign.
80		// Note: call site is guarded by the segment.length() check above.
81		// Note: the ignorables matcher should not touch the result.
82	0	fIgnorablesMatcher.match(segment, result, status);
83	0	if (segment.length() == 0) {
84	0	segment.setOffset(initialOffset);
85	0	return true;
86	0	}
87
88		// Allow a sign, and then try to match digits.
89	0	int8_t exponentSign = 1;
90	0	if (segment.startsWith(minusSignSet())) {
91	0	exponentSign = -1;
92	0	segment.adjustOffsetByCodePoint();
93	0	} else if (segment.startsWith(plusSignSet())) {
94	0	segment.adjustOffsetByCodePoint();
95	0	} else if (segment.startsWith(fCustomMinusSign)) {
96	0	overlap = segment.getCommonPrefixLength(fCustomMinusSign);
97	0	if (overlap != fCustomMinusSign.length()) {
98		// Partial custom sign match
99	0	segment.setOffset(initialOffset);
100	0	return true;
101	0	}
102	0	exponentSign = -1;
103	0	segment.adjustOffset(overlap);
104	0	} else if (segment.startsWith(fCustomPlusSign)) {
105	0	overlap = segment.getCommonPrefixLength(fCustomPlusSign);
106	0	if (overlap != fCustomPlusSign.length()) {
107		// Partial custom sign match
108	0	segment.setOffset(initialOffset);
109	0	return true;
110	0	}
111	0	segment.adjustOffset(overlap);
112	0	}
113
114		// Return true if the segment is empty.
115	0	if (segment.length() == 0) {
116	0	segment.setOffset(initialOffset);
117	0	return true;
118	0	}
119
120		// Allow ignorables after the sign.
121		// Note: call site is guarded by the segment.length() check above.
122		// Note: the ignorables matcher should not touch the result.
123	0	fIgnorablesMatcher.match(segment, result, status);
124	0	if (segment.length() == 0) {
125	0	segment.setOffset(initialOffset);
126	0	return true;
127	0	}
128
129		// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
130	0	bool wasBogus = result.quantity.bogus;
131	0	result.quantity.bogus = false;
132	0	int digitsOffset = segment.getOffset();
133	0	bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
134	0	result.quantity.bogus = wasBogus;
135
136	0	if (segment.getOffset() != digitsOffset) {
137		// At least one exponent digit was matched.
138	0	result.flags \|= FLAG_HAS_EXPONENT;
139	0	} else {
140		// No exponent digits were matched
141	0	segment.setOffset(initialOffset);
142	0	}
143	0	return digitsReturnValue;
144
145	0	} else if (overlap == segment.length()) {
146		// Partial exponent separator match
147	0	return true;
148	0	}
149
150		// No match
151	0	return false;
152	0	}
153
154	0	bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
155	0	return segment.startsWith(fExponentSeparatorString);
156	0	}
157
158	0	UnicodeString ScientificMatcher::toString() const {
159	0	return u"<Scientific>";
160	0	}
161
162
163		#endif /* #if !UCONFIG_NO_FORMATTING */

Coverage Report

Created: 2025-01-28 06:38