Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-segmenter.h"
10 :
11 : #include <map>
12 : #include <memory>
13 : #include <string>
14 :
15 : #include "src/heap/factory.h"
16 : #include "src/isolate.h"
17 : #include "src/objects-inl.h"
18 : #include "src/objects/intl-objects.h"
19 : #include "src/objects/js-segmenter-inl.h"
20 : #include "src/objects/managed.h"
21 : #include "unicode/brkiter.h"
22 :
23 : namespace v8 {
24 : namespace internal {
25 :
26 0 : JSSegmenter::LineBreakStyle JSSegmenter::GetLineBreakStyle(const char* str) {
27 0 : if (strcmp(str, "strict") == 0) return JSSegmenter::LineBreakStyle::STRICT;
28 0 : if (strcmp(str, "normal") == 0) return JSSegmenter::LineBreakStyle::NORMAL;
29 0 : if (strcmp(str, "loose") == 0) return JSSegmenter::LineBreakStyle::LOOSE;
30 0 : UNREACHABLE();
31 : }
32 :
33 0 : JSSegmenter::Granularity JSSegmenter::GetGranularity(const char* str) {
34 0 : if (strcmp(str, "grapheme") == 0) return JSSegmenter::Granularity::GRAPHEME;
35 0 : if (strcmp(str, "word") == 0) return JSSegmenter::Granularity::WORD;
36 0 : if (strcmp(str, "sentence") == 0) return JSSegmenter::Granularity::SENTENCE;
37 0 : if (strcmp(str, "line") == 0) return JSSegmenter::Granularity::LINE;
38 0 : UNREACHABLE();
39 : }
40 :
41 1269 : MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
42 : Isolate* isolate, Handle<JSSegmenter> segmenter_holder,
43 : Handle<Object> locales, Handle<Object> input_options) {
44 : segmenter_holder->set_flags(0);
45 :
46 : // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
47 : Maybe<std::vector<std::string>> maybe_requested_locales =
48 1269 : Intl::CanonicalizeLocaleList(isolate, locales);
49 1269 : MAYBE_RETURN(maybe_requested_locales, Handle<JSSegmenter>());
50 : std::vector<std::string> requested_locales =
51 1260 : maybe_requested_locales.FromJust();
52 :
53 : // 11. If options is undefined, then
54 : Handle<JSReceiver> options;
55 2520 : if (input_options->IsUndefined(isolate)) {
56 : // 11. a. Let options be ObjectCreate(null).
57 270 : options = isolate->factory()->NewJSObjectWithNullProto();
58 : // 12. Else
59 : } else {
60 : // 23. a. Let options be ? ToObject(options).
61 1980 : ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
62 : Object::ToObject(isolate, input_options),
63 : JSSegmenter);
64 : }
65 :
66 : // 4. Let opt be a new Record.
67 : // 5. Let matcher be ? GetOption(options, "localeMatcher", "string",
68 : // « "lookup", "best fit" », "best fit").
69 : // 6. Set opt.[[localeMatcher]] to matcher.
70 : Maybe<Intl::MatcherOption> maybe_locale_matcher =
71 1260 : Intl::GetLocaleMatcher(isolate, options, "Intl.Segmenter");
72 1260 : MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
73 : Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
74 :
75 : // 8. Set opt.[[lb]] to lineBreakStyle.
76 :
77 : // 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
78 : // requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
79 : Intl::ResolvedLocale r =
80 : Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
81 6255 : requested_locales, matcher, {"lb"});
82 :
83 : // 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
84 : // "strict", "normal", "loose" », "normal").
85 : Maybe<LineBreakStyle> maybe_line_break_style =
86 : Intl::GetStringOption<LineBreakStyle>(
87 : isolate, options, "lineBreakStyle", "Intl.Segmenter",
88 : {"strict", "normal", "loose"},
89 : {LineBreakStyle::STRICT, LineBreakStyle::NORMAL,
90 : LineBreakStyle::LOOSE},
91 3753 : LineBreakStyle::NORMAL);
92 1251 : MAYBE_RETURN(maybe_line_break_style, MaybeHandle<JSSegmenter>());
93 : LineBreakStyle line_break_style_enum = maybe_line_break_style.FromJust();
94 :
95 : // 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
96 : Handle<String> locale_str =
97 1233 : isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
98 1233 : segmenter_holder->set_locale(*locale_str);
99 :
100 : // 13. Let granularity be ? GetOption(options, "granularity", "string", «
101 : // "grapheme", "word", "sentence", "line" », "grapheme").
102 : Maybe<Granularity> maybe_granularity = Intl::GetStringOption<Granularity>(
103 : isolate, options, "granularity", "Intl.Segmenter",
104 : {"grapheme", "word", "sentence", "line"},
105 : {Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE,
106 : Granularity::LINE},
107 3699 : Granularity::GRAPHEME);
108 1233 : MAYBE_RETURN(maybe_granularity, MaybeHandle<JSSegmenter>());
109 : Granularity granularity_enum = maybe_granularity.FromJust();
110 :
111 : // 14. Set segmenter.[[SegmenterGranularity]] to granularity.
112 1215 : segmenter_holder->set_granularity(granularity_enum);
113 :
114 : // 15. If granularity is "line",
115 1215 : if (granularity_enum == Granularity::LINE) {
116 : // a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
117 315 : segmenter_holder->set_line_break_style(line_break_style_enum);
118 : } else {
119 900 : segmenter_holder->set_line_break_style(LineBreakStyle::NOTSET);
120 : }
121 :
122 2430 : icu::Locale icu_locale = r.icu_locale;
123 : DCHECK(!icu_locale.isBogus());
124 :
125 1215 : UErrorCode status = U_ZERO_ERROR;
126 : std::unique_ptr<icu::BreakIterator> icu_break_iterator;
127 :
128 1215 : switch (granularity_enum) {
129 : case Granularity::GRAPHEME:
130 : icu_break_iterator.reset(
131 576 : icu::BreakIterator::createCharacterInstance(icu_locale, status));
132 : break;
133 : case Granularity::WORD:
134 : icu_break_iterator.reset(
135 171 : icu::BreakIterator::createWordInstance(icu_locale, status));
136 : break;
137 : case Granularity::SENTENCE:
138 : icu_break_iterator.reset(
139 153 : icu::BreakIterator::createSentenceInstance(icu_locale, status));
140 : break;
141 : case Granularity::LINE: {
142 : // 15. If granularity is "line",
143 : // a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
144 315 : const char* key = uloc_toLegacyKey("lb");
145 315 : CHECK_NOT_NULL(key);
146 : const char* value =
147 315 : uloc_toLegacyType(key, segmenter_holder->LineBreakStyleAsCString());
148 315 : CHECK_NOT_NULL(value);
149 315 : UErrorCode status = U_ZERO_ERROR;
150 315 : icu_locale.setKeywordValue(key, value, status);
151 630 : CHECK(U_SUCCESS(status));
152 : icu_break_iterator.reset(
153 315 : icu::BreakIterator::createLineInstance(icu_locale, status));
154 : break;
155 : }
156 : case Granularity::COUNT:
157 0 : UNREACHABLE();
158 : }
159 :
160 2430 : CHECK(U_SUCCESS(status));
161 1215 : CHECK_NOT_NULL(icu_break_iterator.get());
162 :
163 : Handle<Managed<icu::BreakIterator>> managed_break_iterator =
164 : Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
165 2430 : std::move(icu_break_iterator));
166 :
167 1215 : segmenter_holder->set_icu_break_iterator(*managed_break_iterator);
168 1215 : return segmenter_holder;
169 : }
170 :
171 : // ecma402 #sec-Intl.Segmenter.prototype.resolvedOptions
172 540 : Handle<JSObject> JSSegmenter::ResolvedOptions(
173 : Isolate* isolate, Handle<JSSegmenter> segmenter_holder) {
174 : Factory* factory = isolate->factory();
175 : // 3. Let options be ! ObjectCreate(%ObjectPrototype%).
176 540 : Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
177 : // 4. For each row of Table 1, except the header row, do
178 : // a. Let p be the Property value of the current row.
179 : // b. Let v be the value of pr's internal slot whose name is the Internal Slot
180 : // value of the current row.
181 : //
182 : // c. If v is not undefined, then
183 : // i. Perform ! CreateDataPropertyOrThrow(options, p, v).
184 : // Table 1: Resolved Options of Segmenter Instances
185 : // Internal Slot Property
186 : // [[Locale]] "locale"
187 : // [[SegmenterGranularity]] "granularity"
188 : // [[SegmenterLineBreakStyle]] "lineBreakStyle"
189 :
190 1080 : Handle<String> locale(segmenter_holder->locale(), isolate);
191 : JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
192 540 : NONE);
193 : JSObject::AddProperty(isolate, result, factory->granularity_string(),
194 1080 : segmenter_holder->GranularityAsString(), NONE);
195 540 : if (segmenter_holder->line_break_style() != LineBreakStyle::NOTSET) {
196 : JSObject::AddProperty(isolate, result, factory->lineBreakStyle_string(),
197 180 : segmenter_holder->LineBreakStyleAsString(), NONE);
198 : }
199 : // 5. Return options.
200 540 : return result;
201 : }
202 :
203 315 : const char* JSSegmenter::LineBreakStyleAsCString() const {
204 315 : switch (line_break_style()) {
205 : case LineBreakStyle::STRICT:
206 : return "strict";
207 : case LineBreakStyle::NORMAL:
208 171 : return "normal";
209 : case LineBreakStyle::LOOSE:
210 72 : return "loose";
211 : case LineBreakStyle::COUNT:
212 : case LineBreakStyle::NOTSET:
213 0 : UNREACHABLE();
214 : }
215 0 : }
216 :
217 90 : Handle<String> JSSegmenter::LineBreakStyleAsString() const {
218 90 : switch (line_break_style()) {
219 : case LineBreakStyle::STRICT:
220 36 : return GetReadOnlyRoots().strict_string_handle();
221 : case LineBreakStyle::NORMAL:
222 108 : return GetReadOnlyRoots().normal_string_handle();
223 : case LineBreakStyle::LOOSE:
224 36 : return GetReadOnlyRoots().loose_string_handle();
225 : case LineBreakStyle::COUNT:
226 : case LineBreakStyle::NOTSET:
227 0 : UNREACHABLE();
228 : }
229 0 : }
230 :
231 540 : Handle<String> JSSegmenter::GranularityAsString() const {
232 540 : switch (granularity()) {
233 : case Granularity::GRAPHEME:
234 630 : return GetReadOnlyRoots().grapheme_string_handle();
235 : case Granularity::WORD:
236 144 : return GetReadOnlyRoots().word_string_handle();
237 : case Granularity::SENTENCE:
238 126 : return GetReadOnlyRoots().sentence_string_handle();
239 : case Granularity::LINE:
240 180 : return GetReadOnlyRoots().line_string_handle();
241 : case Granularity::COUNT:
242 0 : UNREACHABLE();
243 : }
244 0 : }
245 :
246 1292 : std::set<std::string> JSSegmenter::GetAvailableLocales() {
247 1292 : int32_t num_locales = 0;
248 : const icu::Locale* icu_available_locales =
249 1292 : icu::BreakIterator::getAvailableLocales(num_locales);
250 1292 : return Intl::BuildLocaleSet(icu_available_locales, num_locales);
251 : }
252 :
253 : } // namespace internal
254 183867 : } // namespace v8
|