Coverage Report

Created: 2025-03-04 07:22

/src/serenity/Userland/Libraries/LibLocale/Segmenter.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/Utf16View.h>
8
#include <AK/Utf32View.h>
9
#include <LibLocale/Locale.h>
10
#include <LibLocale/Segmenter.h>
11
#include <LibUnicode/Segmentation.h>
12
13
namespace Locale {
14
15
SegmenterGranularity segmenter_granularity_from_string(StringView segmenter_granularity)
16
0
{
17
0
    if (segmenter_granularity == "grapheme"sv)
18
0
        return SegmenterGranularity::Grapheme;
19
0
    if (segmenter_granularity == "sentence"sv)
20
0
        return SegmenterGranularity::Sentence;
21
0
    if (segmenter_granularity == "word"sv)
22
0
        return SegmenterGranularity::Word;
23
0
    VERIFY_NOT_REACHED();
24
0
}
25
26
StringView segmenter_granularity_to_string(SegmenterGranularity segmenter_granularity)
27
0
{
28
0
    switch (segmenter_granularity) {
29
0
    case SegmenterGranularity::Grapheme:
30
0
        return "grapheme"sv;
31
0
    case SegmenterGranularity::Sentence:
32
0
        return "sentence"sv;
33
0
    case SegmenterGranularity::Word:
34
0
        return "word"sv;
35
0
    }
36
0
    VERIFY_NOT_REACHED();
37
0
}
38
39
class SegmenterImpl : public Segmenter {
40
public:
41
    SegmenterImpl(SegmenterGranularity segmenter_granularity)
42
0
        : Segmenter(segmenter_granularity)
43
0
    {
44
0
    }
45
46
0
    virtual ~SegmenterImpl() override = default;
47
48
    virtual NonnullOwnPtr<Segmenter> clone() const override
49
0
    {
50
0
        return make<SegmenterImpl>(m_segmenter_granularity);
51
0
    }
52
53
    virtual void set_segmented_text(String text) override
54
0
    {
55
0
        m_string_storage = move(text);
56
0
        set_text(m_string_storage.code_points());
57
0
    }
58
59
    virtual void set_segmented_text(Utf16View const& text) override
60
0
    {
61
0
        set_text(text);
62
0
    }
63
64
    void set_segmented_text(Utf32View const& text)
65
0
    {
66
0
        set_text(text);
67
0
    }
68
69
    virtual size_t current_boundary() override
70
0
    {
71
0
        return m_current_boundary;
72
0
    }
73
74
    virtual Optional<size_t> previous_boundary(size_t boundary, Inclusive inclusive) override
75
0
    {
76
0
        recompute_boundaries_if_necessary();
77
78
0
        if (inclusive == Inclusive::Yes)
79
0
            ++boundary;
80
81
        // FIXME: Add AK::lower_bound, use
82
0
        Optional<size_t> new_boundary;
83
0
        for (auto segment_boundary : m_boundaries) {
84
0
            if (segment_boundary < boundary) {
85
0
                new_boundary = segment_boundary;
86
0
                continue;
87
0
            }
88
0
            break;
89
0
        }
90
91
0
        if (new_boundary.has_value())
92
0
            m_current_boundary = new_boundary.value();
93
0
        return new_boundary;
94
0
    }
95
96
    virtual Optional<size_t> next_boundary(size_t boundary, Inclusive inclusive) override
97
0
    {
98
0
        recompute_boundaries_if_necessary();
99
100
0
        if (inclusive == Inclusive::Yes)
101
0
            --boundary;
102
103
        // FIXME: Add AK::upper_bound, use
104
0
        Optional<size_t> new_boundary;
105
0
        for (auto segment_boundary : m_boundaries) {
106
0
            if (segment_boundary > boundary) {
107
0
                new_boundary = segment_boundary;
108
0
                break;
109
0
            }
110
0
        }
111
112
0
        if (new_boundary.has_value())
113
0
            m_current_boundary = new_boundary.value();
114
0
        return new_boundary;
115
0
    }
116
117
    virtual void for_each_boundary(String text, SegmentationCallback callback) override
118
0
    {
119
0
        for_each_segmentation_boundary(text.code_points(), move(callback));
120
0
    }
121
122
    virtual void for_each_boundary(Utf16View const& text, SegmentationCallback callback) override
123
0
    {
124
0
        for_each_segmentation_boundary(text, move(callback));
125
0
    }
126
127
    virtual void for_each_boundary(Utf32View const& text, SegmentationCallback callback) override
128
0
    {
129
0
        for_each_segmentation_boundary(text, move(callback));
130
0
    }
131
132
    virtual bool is_current_boundary_word_like() const override
133
0
    {
134
        // FIXME: Implement one day.
135
0
        return false;
136
0
    }
137
138
private:
139
    void set_text(Variant<Utf8View, Utf16View, Utf32View> text)
140
0
    {
141
0
        m_segmented_text = text;
142
0
        m_must_recompute_boundaries = true;
143
0
    }
144
145
    void recompute_boundaries_if_necessary()
146
0
    {
147
0
        if (!m_must_recompute_boundaries)
148
0
            return;
149
150
0
        m_boundaries.clear();
151
0
        auto callback = [&](size_t boundary) {
152
0
            m_boundaries.append(boundary);
153
0
            return IterationDecision::Continue;
154
0
        };
155
0
        m_segmented_text.visit([&](auto const& text) { return for_each_segmentation_boundary(text, move(callback)); });
Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf8View>(AK::Utf8View const&) const
Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf16View>(AK::Utf16View const&) const
Unexecuted instantiation: auto Locale::SegmenterImpl::recompute_boundaries_if_necessary()::{lambda(auto:1 const&)#1}::operator()<AK::Utf32View>(AK::Utf32View const&) const
156
0
        m_must_recompute_boundaries = false;
157
0
    }
158
159
    template<class T>
160
    void for_each_segmentation_boundary(T const& text, SegmentationCallback callback)
161
0
    {
162
0
        switch (segmenter_granularity()) {
163
0
        case SegmenterGranularity::Grapheme:
164
0
            Unicode::for_each_grapheme_segmentation_boundary(text, move(callback));
165
0
            break;
166
0
        case SegmenterGranularity::Sentence:
167
0
            Unicode::for_each_sentence_segmentation_boundary(text, move(callback));
168
0
            break;
169
0
        case SegmenterGranularity::Word:
170
0
            Unicode::for_each_word_segmentation_boundary(text, move(callback));
171
0
            break;
172
0
        }
173
0
    }
Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf8View>(AK::Utf8View const&, AK::Function<AK::IterationDecision (unsigned long)>)
Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf16View>(AK::Utf16View const&, AK::Function<AK::IterationDecision (unsigned long)>)
Unexecuted instantiation: void Locale::SegmenterImpl::for_each_segmentation_boundary<AK::Utf32View>(AK::Utf32View const&, AK::Function<AK::IterationDecision (unsigned long)>)
174
175
    bool m_must_recompute_boundaries { true };
176
    Vector<size_t> m_boundaries;
177
    size_t m_current_boundary { 0 };
178
    String m_string_storage;
179
    Variant<Utf8View, Utf16View, Utf32View> m_segmented_text { Utf8View {} };
180
};
181
182
NonnullOwnPtr<Segmenter> Segmenter::create(SegmenterGranularity segmenter_granularity)
183
0
{
184
0
    return Segmenter::create(default_locale(), segmenter_granularity);
185
0
}
186
187
NonnullOwnPtr<Segmenter> Segmenter::create(StringView locale, SegmenterGranularity segmenter_granularity)
188
0
{
189
    // FIXME: Implement locale-specific segmentation.
190
0
    (void)locale;
191
0
    return make<SegmenterImpl>(segmenter_granularity);
192
0
}
193
194
}