/src/serenity/Userland/Libraries/LibLocale/NumberFormat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/CharacterTypes.h> |
8 | | #include <AK/Utf8View.h> |
9 | | #include <LibLocale/Locale.h> |
10 | | #include <LibLocale/NumberFormat.h> |
11 | | #include <LibUnicode/CharacterTypes.h> |
12 | | |
13 | | #if ENABLE_UNICODE_DATA |
14 | | # include <LibUnicode/UnicodeData.h> |
15 | | #endif |
16 | | |
17 | | namespace Locale { |
18 | | |
19 | | Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; } |
20 | | Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; } |
21 | | Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; } |
22 | | Vector<NumberFormat> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return {}; } |
23 | | Vector<NumberFormat> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return {}; } |
24 | | |
25 | | Optional<ReadonlySpan<u32>> __attribute__((weak)) get_digits_for_number_system(StringView) |
26 | | { |
27 | | // Fall back to "latn" digits when Unicode data generation is disabled. |
28 | | constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } }; |
29 | | return digits.span(); |
30 | | } |
31 | | |
32 | | String replace_digits_for_number_system(StringView system, StringView number) |
33 | 0 | { |
34 | 0 | auto digits = get_digits_for_number_system(system); |
35 | 0 | if (!digits.has_value()) |
36 | 0 | digits = get_digits_for_number_system("latn"sv); |
37 | 0 | VERIFY(digits.has_value()); |
38 | | |
39 | 0 | StringBuilder builder; |
40 | |
|
41 | 0 | for (auto ch : number) { |
42 | 0 | if (is_ascii_digit(ch)) { |
43 | 0 | u32 digit = digits->at(parse_ascii_digit(ch)); |
44 | 0 | builder.append_code_point(digit); |
45 | 0 | } else { |
46 | 0 | builder.append(ch); |
47 | 0 | } |
48 | 0 | } |
49 | |
|
50 | 0 | return MUST(builder.to_string()); |
51 | 0 | } |
52 | | |
53 | | #if ENABLE_UNICODE_DATA |
54 | | static u32 last_code_point(StringView string) |
55 | 0 | { |
56 | 0 | Utf8View utf8_string { string }; |
57 | 0 | u32 code_point = 0; |
58 | |
|
59 | 0 | for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it) |
60 | 0 | code_point = *it; |
61 | |
|
62 | 0 | return code_point; |
63 | 0 | } |
64 | | #endif |
65 | | |
66 | | // https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies |
67 | | Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern) |
68 | 0 | { |
69 | 0 | #if ENABLE_UNICODE_DATA |
70 | 0 | constexpr auto number_key = "{number}"sv; |
71 | 0 | constexpr auto currency_key = "{currency}"sv; |
72 | 0 | constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP) |
73 | |
|
74 | 0 | auto number_index = base_pattern.find(number_key); |
75 | 0 | VERIFY(number_index.has_value()); |
76 | | |
77 | 0 | auto currency_index = base_pattern.find(currency_key); |
78 | 0 | VERIFY(currency_index.has_value()); |
79 | | |
80 | 0 | Utf8View utf8_currency_display { currency_display }; |
81 | 0 | Optional<String> currency_key_with_spacing; |
82 | |
|
83 | 0 | if (*number_index < *currency_index) { |
84 | 0 | u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index)); |
85 | |
|
86 | 0 | if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) { |
87 | 0 | u32 first_currency_code_point = *utf8_currency_display.begin(); |
88 | |
|
89 | 0 | if (!Unicode::code_point_has_general_category(first_currency_code_point, Unicode::GeneralCategory::Symbol)) |
90 | 0 | currency_key_with_spacing = MUST(String::formatted("{}{}", spacing, currency_key)); |
91 | 0 | } |
92 | 0 | } else { |
93 | 0 | u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index)); |
94 | |
|
95 | 0 | if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) { |
96 | 0 | u32 last_currency_code_point = last_code_point(currency_display); |
97 | |
|
98 | 0 | if (!Unicode::code_point_has_general_category(last_currency_code_point, Unicode::GeneralCategory::Symbol)) |
99 | 0 | currency_key_with_spacing = MUST(String::formatted("{}{}", currency_key, spacing)); |
100 | 0 | } |
101 | 0 | } |
102 | | |
103 | 0 | if (currency_key_with_spacing.has_value()) |
104 | 0 | return MUST(MUST(String::from_utf8(base_pattern)).replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly)); |
105 | 0 | #endif |
106 | | |
107 | 0 | return {}; |
108 | 0 | } |
109 | | |
110 | | // https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing |
111 | | Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper) |
112 | 0 | { |
113 | 0 | #if ENABLE_UNICODE_DATA |
114 | 0 | auto range_pattern_with_spacing = [&]() { |
115 | 0 | return MUST(String::formatted(" {} ", range_separator)); |
116 | 0 | }; |
117 | |
|
118 | 0 | Utf8View utf8_range_separator { range_separator }; |
119 | 0 | Utf8View utf8_upper { upper }; |
120 | | |
121 | | // NOTE: Our implementation does the prescribed checks backwards for simplicity. |
122 | | |
123 | | // To determine whether to add spacing, the currently recommended heuristic is: |
124 | | // 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders. |
125 | 0 | for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) { |
126 | 0 | if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space)) |
127 | 0 | return {}; |
128 | 0 | } |
129 | | |
130 | | // 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit. |
131 | 0 | if (auto it = utf8_upper.begin(); it != utf8_upper.end()) { |
132 | 0 | if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number)) |
133 | 0 | return range_pattern_with_spacing(); |
134 | 0 | } |
135 | | |
136 | 0 | if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number)) |
137 | 0 | return range_pattern_with_spacing(); |
138 | 0 | #endif |
139 | | |
140 | 0 | return {}; |
141 | 0 | } |
142 | | |
143 | | } |