/src/serenity/Userland/Libraries/LibUnicode/UnicodeUtils.cpp

Source (jump to first uncovered line)
/*
 * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/Platform.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Types.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/Segmentation.h>
#include <LibUnicode/UnicodeUtils.h>

#if ENABLE_UNICODE_DATA
#    include <LibUnicode/UnicodeData.h>
#endif

// For details on the algorithms used here, see Section 3.13 Default Case Algorithms
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf

namespace Unicode::Detail {

#if ENABLE_UNICODE_DATA

static bool is_after_uppercase_i(Utf8View const& string, size_t index)
{
    // There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
    auto preceding_view = string.substring_view(0, index);
    bool found_uppercase_i = false;

    // FIXME: Would be better if Utf8View supported reverse iteration.
    for (auto code_point : preceding_view) {
        if (code_point == 'I') {
            found_uppercase_i = true;
            continue;
        }

        auto combining_class = canonical_combining_class(code_point);
        if (combining_class == 0 || combining_class == 230)
            found_uppercase_i = false;
    }

    return found_uppercase_i;
}

static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index)
{
    // There is a Soft_Dotted character before C, with no intervening character of combining class 0 or 230 (Above).
    auto preceding_view = string.substring_view(0, index);
    bool found_soft_dotted_code_point = false;

    // FIXME: Would be better if Utf8View supported reverse iteration.
    for (auto code_point : preceding_view) {
        if (code_point_has_property(code_point, Property::Soft_Dotted)) {
            found_soft_dotted_code_point = true;
            continue;
        }

        auto combining_class = canonical_combining_class(code_point);
        if (combining_class == 0 || combining_class == 230)
            found_soft_dotted_code_point = false;
    }

    return found_soft_dotted_code_point;
}

static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
{
    // C is preceded by a sequence consisting of a cased letter and then zero or more case-ignorable
    // characters, and C is not followed by a sequence consisting of zero or more case-ignorable
    // characters and then a cased letter.
    auto preceding_view = string.substring_view(0, index);
    auto following_view = ((index + byte_length) < string.byte_length())
        ? string.substring_view(index + byte_length)
        : Utf8View {};

    size_t cased_letter_count = 0;

    for (auto code_point : preceding_view) {
        bool is_cased = code_point_has_property(code_point, Property::Cased);
        bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);

        if (is_cased && !is_case_ignorable)
            ++cased_letter_count;
        else if (!is_case_ignorable)
            cased_letter_count = 0;
    }

    if (cased_letter_count == 0)
        return false;

    for (auto code_point : following_view) {
        bool is_cased = code_point_has_property(code_point, Property::Cased);
        bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);

        if (is_case_ignorable)
            continue;
        if (is_cased)
            return false;

        break;
    }

    return true;
}

static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length)
{
    // C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above).
    auto following_view = ((index + byte_length) < string.byte_length())
        ? string.substring_view(index + byte_length)
        : Utf8View {};

    for (auto code_point : following_view) {
        u32 combining_class = canonical_combining_class(code_point);

        if (combining_class == 0)
            return false;
        if (combining_class == 230)
            return true;
    }

    return false;
}

static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t index, size_t byte_length)
{
    // C is followed by combining dot above (U+0307). Any sequence of characters with a combining class that is neither 0 nor 230 may
    // intervene between the current character and the combining dot above.
    auto following_view = ((index + byte_length) < string.byte_length())
        ? string.substring_view(index + byte_length)
        : Utf8View {};

    for (auto code_point : following_view) {
        if (code_point == 0x307)
            return true;

        u32 combining_class = canonical_combining_class(code_point);

        if (combining_class == 0)
            return false;
        if (combining_class == 230)
            return false;
    }

    return false;
}

static Optional<SpecialCasing const&> find_matching_special_case(u32 code_point, Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length)
{
    auto requested_locale = Locale::None;

    if (locale.has_value()) {
        if (auto maybe_locale = locale_from_string(*locale); maybe_locale.has_value())
            requested_locale = *maybe_locale;
    }

    auto special_casings = special_case_mapping(code_point);

    for (auto const& special_casing : special_casings) {
        if (special_casing.locale != Locale::None && special_casing.locale != requested_locale)
            continue;

        switch (special_casing.condition) {
        case Condition::None:
            return special_casing;

        case Condition::AfterI:
            if (is_after_uppercase_i(string, index))
                return special_casing;
            break;

        case Condition::AfterSoftDotted:
            if (is_after_soft_dotted_code_point(string, index))
                return special_casing;
            break;

        case Condition::FinalSigma:
            if (is_final_code_point(string, index, byte_length))
                return special_casing;
            break;

        case Condition::MoreAbove:
            if (is_followed_by_combining_class_above(string, index, byte_length))
                return special_casing;
            break;

        case Condition::NotBeforeDot:
            if (!is_followed_by_combining_dot_above(string, index, byte_length))
                return special_casing;
            break;
        }
    }

    return {};
}

template<CaseFoldingStatus... StatusFilter>
static Optional<CaseFolding const&> find_matching_case_folding(u32 code_point)
{
    auto case_foldings = case_folding_mapping(code_point);

    for (auto const& case_folding : case_foldings) {
        if (((case_folding.status == StatusFilter) || ...))
            return case_folding;
    }

    return {};
}

#endif

// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_lowercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
{
#if ENABLE_UNICODE_DATA
    size_t index = 0;
    size_t byte_length = 0;

    for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
        u32 code_point = *it;
        byte_length = it.underlying_code_point_length_in_bytes();

        auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
        if (!special_casing.has_value()) {
            TRY(builder.try_append_code_point(to_unicode_lowercase(code_point)));
            continue;
        }

        for (size_t i = 0; i < special_casing->lowercase_mapping_size; ++i)
            TRY(builder.try_append_code_point(special_casing->lowercase_mapping[i]));
    }

    return {};
#else
    return Error::from_string_literal("Unicode data has been disabled");
#endif
}

// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
{
#if ENABLE_UNICODE_DATA
    size_t index = 0;
    size_t byte_length = 0;

    for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
        u32 code_point = *it;
        byte_length = it.underlying_code_point_length_in_bytes();

        auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
        if (!special_casing.has_value()) {
            TRY(builder.try_append_code_point(to_unicode_uppercase(code_point)));
            continue;
        }

        for (size_t i = 0; i < special_casing->uppercase_mapping_size; ++i)
            TRY(builder.try_append_code_point(special_casing->uppercase_mapping[i]));
    }

    return {};
#else
    return Error::from_string_literal("Unicode data has been disabled");
#endif
}

// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale, [[maybe_unused]] TrailingCodePointTransformation trailing_code_point_transformation)
{
#if ENABLE_UNICODE_DATA
    // toTitlecase(X): Find the word boundaries in X according to Unicode Standard Annex #29,
    // “Unicode Text Segmentation.” For each word boundary, find the first cased character F following
    // the word boundary. If F exists, map F to Titlecase_Mapping(F); then map all characters C between
    // F and the following word boundary to Lowercase_Mapping(C).

    auto first_cased_code_point_after_boundary = [&](auto boundary, auto next_boundary) -> Optional<Utf8CodePointIterator> {
        auto it = code_points.iterator_at_byte_offset_without_validation(boundary);
        auto end = code_points.iterator_at_byte_offset_without_validation(next_boundary);

        for (; it != end; ++it) {
            if (code_point_has_property(*it, Property::Cased))
                return it;
        }

        return {};
    };

    auto append_code_point_as_titlecase = [&](auto code_point, auto code_point_offset, auto code_point_length) -> ErrorOr<void> {
        auto special_casing = find_matching_special_case(code_point, code_points, locale, code_point_offset, code_point_length);
        if (!special_casing.has_value()) {
            TRY(builder.try_append_code_point(to_unicode_titlecase(code_point)));
            return {};
        }

        for (size_t i = 0; i < special_casing->titlecase_mapping_size; ++i)
            TRY(builder.try_append_code_point(special_casing->titlecase_mapping[i]));
        return {};
    };

    size_t boundary = 0;

    while (true) {
        auto next_boundary = next_word_segmentation_boundary(code_points, boundary);
        if (!next_boundary.has_value())
            break;

        if (auto it = first_cased_code_point_after_boundary(boundary, *next_boundary); it.has_value()) {
            auto code_point = *it.value();
            auto code_point_offset = code_points.byte_offset_of(*it);
            auto code_point_length = it->underlying_code_point_length_in_bytes();

            auto caseless_code_points = code_points.substring_view(boundary, code_point_offset - boundary);
            TRY(builder.try_append(caseless_code_points.as_string()));

            TRY(append_code_point_as_titlecase(code_point, code_point_offset, code_point_length));
            boundary = code_point_offset + code_point_length;
        }

        auto remaining_code_points = code_points.substring_view(boundary, *next_boundary - boundary);
        switch (trailing_code_point_transformation) {
        case TrailingCodePointTransformation::Lowercase:
            TRY(build_lowercase_string(remaining_code_points, builder, locale));
            break;
        case TrailingCodePointTransformation::PreserveExisting:
            TRY(builder.try_append(remaining_code_points.as_string()));
            break;
        }

        boundary = *next_boundary;
    }

    return {};
#else
    return Error::from_string_literal("Unicode data has been disabled");
#endif
}

// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder)
{
    // toCasefold(X): Map each character C in X to Case_Folding(C).
    for (auto code_point : code_points) {
        auto case_folding = casefold_code_point(code_point);
        TRY(builder.try_append(case_folding));
    }

    return {};
}

// https://www.unicode.org/reports/tr44/#CaseFolding.txt
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
Utf32View casefold_code_point(u32 const& code_point)
{
#if ENABLE_UNICODE_DATA
    // Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
    // CaseFolding.txt in the Unicode Character Database.
    using enum CaseFoldingStatus;

    if (auto case_folding = find_matching_case_folding<Common, Full>(code_point); case_folding.has_value())
        return Utf32View { case_folding->mapping, case_folding->mapping_size };
#endif

    // The case foldings are omitted in the data file if they are the same as the code point itself.
    return Utf32View { &code_point, 1 };
}

}

Coverage Report

Created: 2025-03-04 07:22

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
3		*
4		* SPDX-License-Identifier: BSD-2-Clause
5		*/
6
7		#include <AK/Platform.h>
8		#include <AK/String.h>
9		#include <AK/StringBuilder.h>
10		#include <AK/Types.h>
11		#include <LibUnicode/CharacterTypes.h>
12		#include <LibUnicode/Segmentation.h>
13		#include <LibUnicode/UnicodeUtils.h>
14
15		#if ENABLE_UNICODE_DATA
16		# include <LibUnicode/UnicodeData.h>
17		#endif
18
19		// For details on the algorithms used here, see Section 3.13 Default Case Algorithms
20		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf
21
22		namespace Unicode::Detail {
23
24		#if ENABLE_UNICODE_DATA
25
26		static bool is_after_uppercase_i(Utf8View const& string, size_t index)
27	0	{
28		// There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
29	0	auto preceding_view = string.substring_view(0, index);
30	0	bool found_uppercase_i = false;
31
32		// FIXME: Would be better if Utf8View supported reverse iteration.
33	0	for (auto code_point : preceding_view) {
34	0	if (code_point == 'I') {
35	0	found_uppercase_i = true;
36	0	continue;
37	0	}
38
39	0	auto combining_class = canonical_combining_class(code_point);
40	0	if (combining_class == 0 \|\| combining_class == 230)
41	0	found_uppercase_i = false;
42	0	}
43
44	0	return found_uppercase_i;
45	0	}
46
47		static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index)
48	0	{
49		// There is a Soft_Dotted character before C, with no intervening character of combining class 0 or 230 (Above).
50	0	auto preceding_view = string.substring_view(0, index);
51	0	bool found_soft_dotted_code_point = false;
52
53		// FIXME: Would be better if Utf8View supported reverse iteration.
54	0	for (auto code_point : preceding_view) {
55	0	if (code_point_has_property(code_point, Property::Soft_Dotted)) {
56	0	found_soft_dotted_code_point = true;
57	0	continue;
58	0	}
59
60	0	auto combining_class = canonical_combining_class(code_point);
61	0	if (combining_class == 0 \|\| combining_class == 230)
62	0	found_soft_dotted_code_point = false;
63	0	}
64
65	0	return found_soft_dotted_code_point;
66	0	}
67
68		static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
69	84	{
70		// C is preceded by a sequence consisting of a cased letter and then zero or more case-ignorable
71		// characters, and C is not followed by a sequence consisting of zero or more case-ignorable
72		// characters and then a cased letter.
73	84	auto preceding_view = string.substring_view(0, index);
74	84	auto following_view = ((index + byte_length) < string.byte_length())
75	84	? string.substring_view(index + byte_length)
76	84	: Utf8View {};
77
78	84	size_t cased_letter_count = 0;
79
80	84	for (auto code_point : preceding_view) {
81	84	bool is_cased = code_point_has_property(code_point, Property::Cased);
82	84	bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);
83
84	84	if (is_cased && !is_case_ignorable)
85	42	++cased_letter_count;
86	42	else if (!is_case_ignorable)
87	42	cased_letter_count = 0;
88	84	}
89
90	84	if (cased_letter_count == 0)
91	84	return false;
92
93	0	for (auto code_point : following_view) {
94	0	bool is_cased = code_point_has_property(code_point, Property::Cased);
95	0	bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);
96
97	0	if (is_case_ignorable)
98	0	continue;
99	0	if (is_cased)
100	0	return false;
101
102	0	break;
103	0	}
104
105	0	return true;
106	0	}
107
108		static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length)
109	0	{
110		// C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above).
111	0	auto following_view = ((index + byte_length) < string.byte_length())
112	0	? string.substring_view(index + byte_length)
113	0	: Utf8View {};
114
115	0	for (auto code_point : following_view) {
116	0	u32 combining_class = canonical_combining_class(code_point);
117
118	0	if (combining_class == 0)
119	0	return false;
120	0	if (combining_class == 230)
121	0	return true;
122	0	}
123
124	0	return false;
125	0	}
126
127		static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t index, size_t byte_length)
128	0	{
129		// C is followed by combining dot above (U+0307). Any sequence of characters with a combining class that is neither 0 nor 230 may
130		// intervene between the current character and the combining dot above.
131	0	auto following_view = ((index + byte_length) < string.byte_length())
132	0	? string.substring_view(index + byte_length)
133	0	: Utf8View {};
134
135	0	for (auto code_point : following_view) {
136	0	if (code_point == 0x307)
137	0	return true;
138
139	0	u32 combining_class = canonical_combining_class(code_point);
140
141	0	if (combining_class == 0)
142	0	return false;
143	0	if (combining_class == 230)
144	0	return false;
145	0	}
146
147	0	return false;
148	0	}
149
150		static Optional<SpecialCasing const&> find_matching_special_case(u32 code_point, Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length)
151	23.3k	{
152	23.3k	auto requested_locale = Locale::None;
153
154	23.3k	if (locale.has_value()) {
155	0	if (auto maybe_locale = locale_from_string(*locale); maybe_locale.has_value())
156	0	requested_locale = *maybe_locale;
157	0	}
158
159	23.3k	auto special_casings = special_case_mapping(code_point);
160
161	23.3k	for (auto const& special_casing : special_casings) {
162	19.6k	if (special_casing.locale != Locale::None && special_casing.locale != requested_locale)
163	19.0k	continue;
164
165	536	switch (special_casing.condition) {
166	452	case Condition::None:
167	452	return special_casing;
168
169	0	case Condition::AfterI:
170	0	if (is_after_uppercase_i(string, index))
171	0	return special_casing;
172	0	break;
173
174	0	case Condition::AfterSoftDotted:
175	0	if (is_after_soft_dotted_code_point(string, index))
176	0	return special_casing;
177	0	break;
178
179	84	case Condition::FinalSigma:
180	84	if (is_final_code_point(string, index, byte_length))
181	0	return special_casing;
182	84	break;
183
184	84	case Condition::MoreAbove:
185	0	if (is_followed_by_combining_class_above(string, index, byte_length))
186	0	return special_casing;
187	0	break;
188
189	0	case Condition::NotBeforeDot:
190	0	if (!is_followed_by_combining_dot_above(string, index, byte_length))
191	0	return special_casing;
192	0	break;
193	536	}
194	536	}
195
196	22.8k	return {};
197	23.3k	}
198
199		template<CaseFoldingStatus... StatusFilter>
200		static Optional<CaseFolding const&> find_matching_case_folding(u32 code_point)
201	0	{
202	0	auto case_foldings = case_folding_mapping(code_point);
203
204	0	for (auto const& case_folding : case_foldings) {
205	0	if (((case_folding.status == StatusFilter) \|\| ...))
206	0	return case_folding;
207	0	}
208
209	0	return {};
210	0	}
211
212		#endif
213
214		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
215		ErrorOr<void> build_lowercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
216	0	{
217	0	#if ENABLE_UNICODE_DATA
218	0	size_t index = 0;
219	0	size_t byte_length = 0;
220
221	0	for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
222	0	u32 code_point = *it;
223	0	byte_length = it.underlying_code_point_length_in_bytes();
224
225	0	auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
226	0	if (!special_casing.has_value()) {
227	0	TRY(builder.try_append_code_point(to_unicode_lowercase(code_point)));
228	0	continue;
229	0	}
230
231	0	for (size_t i = 0; i < special_casing->lowercase_mapping_size; ++i)
232	0	TRY(builder.try_append_code_point(special_casing->lowercase_mapping[i]));
233	0	}
234
235	0	return {};
236		#else
237		return Error::from_string_literal("Unicode data has been disabled");
238		#endif
239	0	}
240
241		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
242		ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
243	2.60k	{
244	2.60k	#if ENABLE_UNICODE_DATA
245	2.60k	size_t index = 0;
246	2.60k	size_t byte_length = 0;
247
248	25.9k	for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
249	23.3k	u32 code_point = *it;
250	23.3k	byte_length = it.underlying_code_point_length_in_bytes();
251
252	23.3k	auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
253	23.3k	if (!special_casing.has_value()) {
254	22.8k	TRY(builder.try_append_code_point(to_unicode_uppercase(code_point)));
255	0	continue;
256	22.8k	}
257
258	1.35k	for (size_t i = 0; i < special_casing->uppercase_mapping_size; ++i)
259	904	TRY(builder.try_append_code_point(special_casing->uppercase_mapping[i]));
260	452	}
261
262	2.60k	return {};
263		#else
264		return Error::from_string_literal("Unicode data has been disabled");
265		#endif
266	2.60k	}
267
268		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
269		ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale, [[maybe_unused]] TrailingCodePointTransformation trailing_code_point_transformation)
270	0	{
271	0	#if ENABLE_UNICODE_DATA
272		// toTitlecase(X): Find the word boundaries in X according to Unicode Standard Annex #29,
273		// “Unicode Text Segmentation.” For each word boundary, find the first cased character F following
274		// the word boundary. If F exists, map F to Titlecase_Mapping(F); then map all characters C between
275		// F and the following word boundary to Lowercase_Mapping(C).
276
277	0	auto first_cased_code_point_after_boundary = [&](auto boundary, auto next_boundary) -> Optional<Utf8CodePointIterator> {
278	0	auto it = code_points.iterator_at_byte_offset_without_validation(boundary);
279	0	auto end = code_points.iterator_at_byte_offset_without_validation(next_boundary);
280
281	0	for (; it != end; ++it) {
282	0	if (code_point_has_property(*it, Property::Cased))
283	0	return it;
284	0	}
285
286	0	return {};
287	0	};
288
289	0	auto append_code_point_as_titlecase = [&](auto code_point, auto code_point_offset, auto code_point_length) -> ErrorOr<void> {
290	0	auto special_casing = find_matching_special_case(code_point, code_points, locale, code_point_offset, code_point_length);
291	0	if (!special_casing.has_value()) {
292	0	TRY(builder.try_append_code_point(to_unicode_titlecase(code_point)));
293	0	return {};
294	0	}
295
296	0	for (size_t i = 0; i < special_casing->titlecase_mapping_size; ++i)
297	0	TRY(builder.try_append_code_point(special_casing->titlecase_mapping[i]));
298	0	return {};
299	0	};
300
301	0	size_t boundary = 0;
302
303	0	while (true) {
304	0	auto next_boundary = next_word_segmentation_boundary(code_points, boundary);
305	0	if (!next_boundary.has_value())
306	0	break;
307
308	0	if (auto it = first_cased_code_point_after_boundary(boundary, *next_boundary); it.has_value()) {
309	0	auto code_point = *it.value();
310	0	auto code_point_offset = code_points.byte_offset_of(*it);
311	0	auto code_point_length = it->underlying_code_point_length_in_bytes();
312
313	0	auto caseless_code_points = code_points.substring_view(boundary, code_point_offset - boundary);
314	0	TRY(builder.try_append(caseless_code_points.as_string()));
315
316	0	TRY(append_code_point_as_titlecase(code_point, code_point_offset, code_point_length));
317	0	boundary = code_point_offset + code_point_length;
318	0	}
319
320	0	auto remaining_code_points = code_points.substring_view(boundary, *next_boundary - boundary);
321	0	switch (trailing_code_point_transformation) {
322	0	case TrailingCodePointTransformation::Lowercase:
323	0	TRY(build_lowercase_string(remaining_code_points, builder, locale));
324	0	break;
325	0	case TrailingCodePointTransformation::PreserveExisting:
326	0	TRY(builder.try_append(remaining_code_points.as_string()));
327	0	break;
328	0	}
329
330	0	boundary = *next_boundary;
331	0	}
332
333	0	return {};
334		#else
335		return Error::from_string_literal("Unicode data has been disabled");
336		#endif
337	0	}
338
339		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
340		ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder)
341	0	{
342		// toCasefold(X): Map each character C in X to Case_Folding(C).
343	0	for (auto code_point : code_points) {
344	0	auto case_folding = casefold_code_point(code_point);
345	0	TRY(builder.try_append(case_folding));
346	0	}
347
348	0	return {};
349	0	}
350
351		// https://www.unicode.org/reports/tr44/#CaseFolding.txt
352		// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
353		Utf32View casefold_code_point(u32 const& code_point)
354	0	{
355	0	#if ENABLE_UNICODE_DATA
356		// Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
357		// CaseFolding.txt in the Unicode Character Database.
358	0	using enum CaseFoldingStatus;
359
360	0	if (auto case_folding = find_matching_case_folding<Common, Full>(code_point); case_folding.has_value())
361	0	return Utf32View { case_folding->mapping, case_folding->mapping_size };
362	0	#endif
363
364		// The case foldings are omitted in the data file if they are the same as the code point itself.
365	0	return Utf32View { &code_point, 1 };
366	0	}
367
368		}