/src/serenity/Userland/Libraries/LibLocale/Segmenter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/Utf16View.h> |
8 | | #include <AK/Utf32View.h> |
9 | | #include <LibLocale/Locale.h> |
10 | | #include <LibLocale/Segmenter.h> |
11 | | #include <LibUnicode/Segmentation.h> |
12 | | |
13 | | namespace Locale { |
14 | | |
15 | | SegmenterGranularity segmenter_granularity_from_string(StringView segmenter_granularity) |
16 | 0 | { |
17 | 0 | if (segmenter_granularity == "grapheme"sv) |
18 | 0 | return SegmenterGranularity::Grapheme; |
19 | 0 | if (segmenter_granularity == "sentence"sv) |
20 | 0 | return SegmenterGranularity::Sentence; |
21 | 0 | if (segmenter_granularity == "word"sv) |
22 | 0 | return SegmenterGranularity::Word; |
23 | 0 | VERIFY_NOT_REACHED(); |
24 | 0 | } |
25 | | |
26 | | StringView segmenter_granularity_to_string(SegmenterGranularity segmenter_granularity) |
27 | 0 | { |
28 | 0 | switch (segmenter_granularity) { |
29 | 0 | case SegmenterGranularity::Grapheme: |
30 | 0 | return "grapheme"sv; |
31 | 0 | case SegmenterGranularity::Sentence: |
32 | 0 | return "sentence"sv; |
33 | 0 | case SegmenterGranularity::Word: |
34 | 0 | return "word"sv; |
35 | 0 | } |
36 | 0 | VERIFY_NOT_REACHED(); |
37 | 0 | } |
38 | | |
39 | | class SegmenterImpl : public Segmenter { |
40 | | public: |
41 | | SegmenterImpl(SegmenterGranularity segmenter_granularity) |
42 | 0 | : Segmenter(segmenter_granularity) |
43 | 0 | { |
44 | 0 | } |
45 | | |
46 | 0 | virtual ~SegmenterImpl() override = default; |
47 | | |
48 | | virtual NonnullOwnPtr<Segmenter> clone() const override |
49 | 0 | { |
50 | 0 | return make<SegmenterImpl>(m_segmenter_granularity); |
51 | 0 | } |
52 | | |
53 | | virtual void set_segmented_text(String text) override |
54 | 0 | { |
55 | 0 | m_string_storage = move(text); |
56 | 0 | set_text(m_string_storage.code_points()); |
57 | 0 | } |
58 | | |
59 | | virtual void set_segmented_text(Utf16View const& text) override |
60 | 0 | { |
61 | 0 | set_text(text); |
62 | 0 | } |
63 | | |
64 | | void set_segmented_text(Utf32View const& text) |
65 | 0 | { |
66 | 0 | set_text(text); |
67 | 0 | } |
68 | | |
69 | | virtual size_t current_boundary() override |
70 | 0 | { |
71 | 0 | return m_current_boundary; |
72 | 0 | } |
73 | | |
74 | | virtual Optional<size_t> previous_boundary(size_t boundary, Inclusive inclusive) override |
75 | 0 | { |
76 | 0 | recompute_boundaries_if_necessary(); |
77 | |
|
78 | 0 | if (inclusive == Inclusive::Yes) |
79 | 0 | ++boundary; |
80 | | |
81 | | // FIXME: Add AK::lower_bound, use |
82 | 0 | Optional<size_t> new_boundary; |
83 | 0 | for (auto segment_boundary : m_boundaries) { |
84 | 0 | if (segment_boundary < boundary) { |
85 | 0 | new_boundary = segment_boundary; |
86 | 0 | continue; |
87 | 0 | } |
88 | 0 | break; |
89 | 0 | } |
90 | |
|
91 | 0 | if (new_boundary.has_value()) |
92 | 0 | m_current_boundary = new_boundary.value(); |
93 | 0 | return new_boundary; |
94 | 0 | } |
95 | | |
96 | | virtual Optional<size_t> next_boundary(size_t boundary, Inclusive inclusive) override |
97 | 0 | { |
98 | 0 | recompute_boundaries_if_necessary(); |
99 | |
|
100 | 0 | if (inclusive == Inclusive::Yes) |
101 | 0 | --boundary; |
102 | | |
103 | | // FIXME: Add AK::upper_bound, use |
104 | 0 | Optional<size_t> new_boundary; |
105 | 0 | for (auto segment_boundary : m_boundaries) { |
106 | 0 | if (segment_boundary > boundary) { |
107 | 0 | new_boundary = segment_boundary; |
108 | 0 | break; |
109 | 0 | } |
110 | 0 | } |
111 | |
|
112 | 0 | if (new_boundary.has_value()) |
113 | 0 | m_current_boundary = new_boundary.value(); |
114 | 0 | return new_boundary; |
115 | 0 | } |
116 | | |
117 | | virtual void for_each_boundary(String text, SegmentationCallback callback) override |
118 | 0 | { |
119 | 0 | for_each_segmentation_boundary(text.code_points(), move(callback)); |
120 | 0 | } |
121 | | |
122 | | virtual void for_each_boundary(Utf16View const& text, SegmentationCallback callback) override |
123 | 0 | { |
124 | 0 | for_each_segmentation_boundary(text, move(callback)); |
125 | 0 | } |
126 | | |
127 | | virtual void for_each_boundary(Utf32View const& text, SegmentationCallback callback) override |
128 | 0 | { |
129 | 0 | for_each_segmentation_boundary(text, move(callback)); |
130 | 0 | } |
131 | | |
132 | | virtual bool is_current_boundary_word_like() const override |
133 | 0 | { |
134 | | // FIXME: Implement one day. |
135 | 0 | return false; |
136 | 0 | } |
137 | | |
138 | | private: |
139 | | void set_text(Variant<Utf8View, Utf16View, Utf32View> text) |
140 | 0 | { |
141 | 0 | m_segmented_text = text; |
142 | 0 | m_must_recompute_boundaries = true; |
143 | 0 | } |
144 | | |
145 | | void recompute_boundaries_if_necessary() |
146 | 0 | { |
147 | 0 | if (!m_must_recompute_boundaries) |
148 | 0 | return; |
149 | | |
150 | 0 | m_boundaries.clear(); |
151 | 0 | auto callback = [&](size_t boundary) { |
152 | 0 | m_boundaries.append(boundary); |
153 | 0 | return IterationDecision::Continue; |
154 | 0 | }; |
155 | 0 | m_segmented_text.visit([&](auto const& text) { return for_each_segmentation_boundary(text, move(callback)); }); Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf8View>(AK::Utf8View const&) const Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf16View>(AK::Utf16View const&) const Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf32View>(AK::Utf32View const&) const |
156 | 0 | m_must_recompute_boundaries = false; |
157 | 0 | } |
158 | | |
159 | | template<class T> |
160 | | void for_each_segmentation_boundary(T const& text, SegmentationCallback callback) |
161 | 0 | { |
162 | 0 | switch (segmenter_granularity()) { |
163 | 0 | case SegmenterGranularity::Grapheme: |
164 | 0 | Unicode::for_each_grapheme_segmentation_boundary(text, move(callback)); |
165 | 0 | break; |
166 | 0 | case SegmenterGranularity::Sentence: |
167 | 0 | Unicode::for_each_sentence_segmentation_boundary(text, move(callback)); |
168 | 0 | break; |
169 | 0 | case SegmenterGranularity::Word: |
170 | 0 | Unicode::for_each_word_segmentation_boundary(text, move(callback)); |
171 | 0 | break; |
172 | 0 | } |
173 | 0 | } Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf8View>(AK::Utf8View const&, AK::Function<AK::IterationDecision (unsigned long)>) Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf16View>(AK::Utf16View const&, AK::Function<AK::IterationDecision (unsigned long)>) Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf32View>(AK::Utf32View const&, AK::Function<AK::IterationDecision (unsigned long)>) |
174 | | |
175 | | bool m_must_recompute_boundaries { true }; |
176 | | Vector<size_t> m_boundaries; |
177 | | size_t m_current_boundary { 0 }; |
178 | | String m_string_storage; |
179 | | Variant<Utf8View, Utf16View, Utf32View> m_segmented_text { Utf8View {} }; |
180 | | }; |
181 | | |
182 | | NonnullOwnPtr<Segmenter> Segmenter::create(SegmenterGranularity segmenter_granularity) |
183 | 0 | { |
184 | 0 | return Segmenter::create(default_locale(), segmenter_granularity); |
185 | 0 | } |
186 | | |
187 | | NonnullOwnPtr<Segmenter> Segmenter::create(StringView locale, SegmenterGranularity segmenter_granularity) |
188 | 0 | { |
189 | | // FIXME: Implement locale-specific segmentation. |
190 | 0 | (void)locale; |
191 | 0 | return make<SegmenterImpl>(segmenter_granularity); |
192 | 0 | } |
193 | | |
194 | | } |