/src/serenity/Userland/Libraries/LibLocale/NumberFormat.cpp

Source (jump to first uncovered line)
/*
 * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/CharacterTypes.h>
#include <AK/Utf8View.h>
#include <LibLocale/Locale.h>
#include <LibLocale/NumberFormat.h>
#include <LibUnicode/CharacterTypes.h>

#if ENABLE_UNICODE_DATA
#    include <LibUnicode/UnicodeData.h>
#endif

namespace Locale {

Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; }
Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; }
Vector<NumberFormat> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return {}; }
Vector<NumberFormat> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return {}; }

Optional<ReadonlySpan<u32>> __attribute__((weak)) get_digits_for_number_system(StringView)
{
    // Fall back to "latn" digits when Unicode data generation is disabled.
    constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } };
    return digits.span();
}

String replace_digits_for_number_system(StringView system, StringView number)
{
    auto digits = get_digits_for_number_system(system);
    if (!digits.has_value())
        digits = get_digits_for_number_system("latn"sv);
    VERIFY(digits.has_value());

    StringBuilder builder;

    for (auto ch : number) {
        if (is_ascii_digit(ch)) {
            u32 digit = digits->at(parse_ascii_digit(ch));
            builder.append_code_point(digit);
        } else {
            builder.append(ch);
        }
    }

    return MUST(builder.to_string());
}

#if ENABLE_UNICODE_DATA
static u32 last_code_point(StringView string)
{
    Utf8View utf8_string { string };
    u32 code_point = 0;

    for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
        code_point = *it;

    return code_point;
}
#endif

// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
{
#if ENABLE_UNICODE_DATA
    constexpr auto number_key = "{number}"sv;
    constexpr auto currency_key = "{currency}"sv;
    constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)

    auto number_index = base_pattern.find(number_key);
    VERIFY(number_index.has_value());

    auto currency_index = base_pattern.find(currency_key);
    VERIFY(currency_index.has_value());

    Utf8View utf8_currency_display { currency_display };
    Optional<String> currency_key_with_spacing;

    if (*number_index < *currency_index) {
        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));

        if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
            u32 first_currency_code_point = *utf8_currency_display.begin();

            if (!Unicode::code_point_has_general_category(first_currency_code_point, Unicode::GeneralCategory::Symbol))
                currency_key_with_spacing = MUST(String::formatted("{}{}", spacing, currency_key));
        }
    } else {
        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));

        if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
            u32 last_currency_code_point = last_code_point(currency_display);

            if (!Unicode::code_point_has_general_category(last_currency_code_point, Unicode::GeneralCategory::Symbol))
                currency_key_with_spacing = MUST(String::formatted("{}{}", currency_key, spacing));
        }
    }

    if (currency_key_with_spacing.has_value())
        return MUST(MUST(String::from_utf8(base_pattern)).replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly));
#endif

    return {};
}

// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper)
{
#if ENABLE_UNICODE_DATA
    auto range_pattern_with_spacing = [&]() {
        return MUST(String::formatted(" {} ", range_separator));
    };

    Utf8View utf8_range_separator { range_separator };
    Utf8View utf8_upper { upper };

    // NOTE: Our implementation does the prescribed checks backwards for simplicity.

    // To determine whether to add spacing, the currently recommended heuristic is:
    // 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
    for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
        if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space))
            return {};
    }

    // 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
    if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
        if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number))
            return range_pattern_with_spacing();
    }

    if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number))
        return range_pattern_with_spacing();
#endif

    return {};
}

}

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
3		*
4		* SPDX-License-Identifier: BSD-2-Clause
5		*/
6
7		#include <AK/CharacterTypes.h>
8		#include <AK/Utf8View.h>
9		#include <LibLocale/Locale.h>
10		#include <LibLocale/NumberFormat.h>
11		#include <LibUnicode/CharacterTypes.h>
12
13		#if ENABLE_UNICODE_DATA
14		# include <LibUnicode/UnicodeData.h>
15		#endif
16
17		namespace Locale {
18
19		Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
20		Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; }
21		Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; }
22		Vector<NumberFormat> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return {}; }
23		Vector<NumberFormat> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return {}; }
24
25		Optional<ReadonlySpan<u32>> __attribute__((weak)) get_digits_for_number_system(StringView)
26		{
27		// Fall back to "latn" digits when Unicode data generation is disabled.
28		constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } };
29		return digits.span();
30		}
31
32		String replace_digits_for_number_system(StringView system, StringView number)
33	0	{
34	0	auto digits = get_digits_for_number_system(system);
35	0	if (!digits.has_value())
36	0	digits = get_digits_for_number_system("latn"sv);
37	0	VERIFY(digits.has_value());
38
39	0	StringBuilder builder;
40
41	0	for (auto ch : number) {
42	0	if (is_ascii_digit(ch)) {
43	0	u32 digit = digits->at(parse_ascii_digit(ch));
44	0	builder.append_code_point(digit);
45	0	} else {
46	0	builder.append(ch);
47	0	}
48	0	}
49
50	0	return MUST(builder.to_string());
51	0	}
52
53		#if ENABLE_UNICODE_DATA
54		static u32 last_code_point(StringView string)
55	0	{
56	0	Utf8View utf8_string { string };
57	0	u32 code_point = 0;
58
59	0	for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
60	0	code_point = *it;
61
62	0	return code_point;
63	0	}
64		#endif
65
66		// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
67		Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
68	0	{
69	0	#if ENABLE_UNICODE_DATA
70	0	constexpr auto number_key = "{number}"sv;
71	0	constexpr auto currency_key = "{currency}"sv;
72	0	constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
73
74	0	auto number_index = base_pattern.find(number_key);
75	0	VERIFY(number_index.has_value());
76
77	0	auto currency_index = base_pattern.find(currency_key);
78	0	VERIFY(currency_index.has_value());
79
80	0	Utf8View utf8_currency_display { currency_display };
81	0	Optional<String> currency_key_with_spacing;
82
83	0	if (number_index < currency_index) {
84	0	u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
85
86	0	if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
87	0	u32 first_currency_code_point = *utf8_currency_display.begin();
88
89	0	if (!Unicode::code_point_has_general_category(first_currency_code_point, Unicode::GeneralCategory::Symbol))
90	0	currency_key_with_spacing = MUST(String::formatted("{}{}", spacing, currency_key));
91	0	}
92	0	} else {
93	0	u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
94
95	0	if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
96	0	u32 last_currency_code_point = last_code_point(currency_display);
97
98	0	if (!Unicode::code_point_has_general_category(last_currency_code_point, Unicode::GeneralCategory::Symbol))
99	0	currency_key_with_spacing = MUST(String::formatted("{}{}", currency_key, spacing));
100	0	}
101	0	}
102
103	0	if (currency_key_with_spacing.has_value())
104	0	return MUST(MUST(String::from_utf8(base_pattern)).replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly));
105	0	#endif
106
107	0	return {};
108	0	}
109
110		// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
111		Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper)
112	0	{
113	0	#if ENABLE_UNICODE_DATA
114	0	auto range_pattern_with_spacing = [&]() {
115	0	return MUST(String::formatted(" {} ", range_separator));
116	0	};
117
118	0	Utf8View utf8_range_separator { range_separator };
119	0	Utf8View utf8_upper { upper };
120
121		// NOTE: Our implementation does the prescribed checks backwards for simplicity.
122
123		// To determine whether to add spacing, the currently recommended heuristic is:
124		// 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
125	0	for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
126	0	if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space))
127	0	return {};
128	0	}
129
130		// 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
131	0	if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
132	0	if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number))
133	0	return range_pattern_with_spacing();
134	0	}
135
136	0	if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number))
137	0	return range_pattern_with_spacing();
138	0	#endif
139
140	0	return {};
141	0	}
142
143		}

Coverage Report

Created: 2025-09-05 06:52