Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-segment-iterator.h"
10 :
11 : #include <map>
12 : #include <memory>
13 : #include <string>
14 :
15 : #include "src/heap/factory.h"
16 : #include "src/isolate.h"
17 : #include "src/objects-inl.h"
18 : #include "src/objects/intl-objects.h"
19 : #include "src/objects/js-segment-iterator-inl.h"
20 : #include "src/objects/managed.h"
21 : #include "unicode/brkiter.h"
22 :
23 : namespace v8 {
24 : namespace internal {
25 :
26 20214 : MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
27 : int32_t start,
28 : int32_t end) const {
29 40428 : return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
30 : }
31 :
32 0 : Handle<String> JSSegmentIterator::GranularityAsString() const {
33 0 : switch (granularity()) {
34 : case JSSegmenter::Granularity::GRAPHEME:
35 0 : return GetReadOnlyRoots().grapheme_string_handle();
36 : case JSSegmenter::Granularity::WORD:
37 0 : return GetReadOnlyRoots().word_string_handle();
38 : case JSSegmenter::Granularity::SENTENCE:
39 0 : return GetReadOnlyRoots().sentence_string_handle();
40 : case JSSegmenter::Granularity::LINE:
41 0 : return GetReadOnlyRoots().line_string_handle();
42 : case JSSegmenter::Granularity::COUNT:
43 0 : UNREACHABLE();
44 : }
45 0 : }
46 :
47 4320 : MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
48 : Isolate* isolate, icu::BreakIterator* break_iterator,
49 : JSSegmenter::Granularity granularity, Handle<String> text) {
50 4320 : CHECK_NOT_NULL(break_iterator);
51 : // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
52 : Handle<Map> map = Handle<Map>(
53 12960 : isolate->native_context()->intl_segment_iterator_map(), isolate);
54 4320 : Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
55 :
56 : Handle<JSSegmentIterator> segment_iterator =
57 4320 : Handle<JSSegmentIterator>::cast(result);
58 :
59 : segment_iterator->set_flags(0);
60 4320 : segment_iterator->set_granularity(granularity);
61 : // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
62 : Handle<Managed<icu::BreakIterator>> managed_break_iterator =
63 4320 : Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
64 4320 : segment_iterator->set_icu_break_iterator(*managed_break_iterator);
65 :
66 : // 3. Let iterator.[[SegmentIteratorString]] be string.
67 : Managed<icu::UnicodeString> unicode_string =
68 4320 : Intl::SetTextToBreakIterator(isolate, text, break_iterator);
69 4320 : segment_iterator->set_unicode_string(unicode_string);
70 :
71 : // 4. Let iterator.[[SegmentIteratorIndex]] be 0.
72 : // step 4 is stored inside break_iterator.
73 :
74 : // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
75 4320 : segment_iterator->set_is_break_type_set(false);
76 :
77 4320 : return segment_iterator;
78 : }
79 :
80 : // ecma402 #sec-segment-iterator-prototype-breakType
81 68022 : Handle<Object> JSSegmentIterator::BreakType() const {
82 68022 : if (!is_break_type_set()) {
83 1242 : return GetReadOnlyRoots().undefined_value_handle();
84 : }
85 134802 : icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
86 67401 : int32_t rule_status = break_iterator->getRuleStatus();
87 67401 : switch (granularity()) {
88 : case JSSegmenter::Granularity::GRAPHEME:
89 57996 : return GetReadOnlyRoots().undefined_value_handle();
90 : case JSSegmenter::Granularity::WORD:
91 22041 : if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
92 : // "words" that do not fit into any of other categories. Includes spaces
93 : // and most punctuation.
94 18774 : return GetReadOnlyRoots().none_string_handle();
95 : }
96 12654 : if ((rule_status >= UBRK_WORD_NUMBER &&
97 : rule_status < UBRK_WORD_NUMBER_LIMIT) ||
98 : (rule_status >= UBRK_WORD_LETTER &&
99 : rule_status < UBRK_WORD_LETTER_LIMIT) ||
100 : (rule_status >= UBRK_WORD_KANA &&
101 12654 : rule_status < UBRK_WORD_KANA_LIMIT) ||
102 : (rule_status >= UBRK_WORD_IDEO &&
103 : rule_status < UBRK_WORD_IDEO_LIMIT)) {
104 : // words that appear to be numbers, letters, kana characters,
105 : // ideographic characters, etc
106 25308 : return GetReadOnlyRoots().word_string_handle();
107 : }
108 0 : return GetReadOnlyRoots().undefined_value_handle();
109 : case JSSegmenter::Granularity::LINE:
110 14823 : if (rule_status >= UBRK_LINE_SOFT && rule_status < UBRK_LINE_SOFT_LIMIT) {
111 : // soft line breaks, index at which a line break is acceptable but
112 : // not required
113 29646 : return GetReadOnlyRoots().soft_string_handle();
114 : }
115 0 : if ((rule_status >= UBRK_LINE_HARD &&
116 : rule_status < UBRK_LINE_HARD_LIMIT)) {
117 : // hard, or mandatory line breaks
118 0 : return GetReadOnlyRoots().hard_string_handle();
119 : }
120 0 : return GetReadOnlyRoots().undefined_value_handle();
121 : case JSSegmenter::Granularity::SENTENCE:
122 1539 : if (rule_status >= UBRK_SENTENCE_TERM &&
123 : rule_status < UBRK_SENTENCE_TERM_LIMIT) {
124 : // sentences ending with a sentence terminator ('.', '?', '!', etc.)
125 : // character, possibly followed by a hard separator (CR, LF, PS, etc.)
126 1818 : return GetReadOnlyRoots().term_string_handle();
127 : }
128 630 : if ((rule_status >= UBRK_SENTENCE_SEP &&
129 : rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
130 : // sentences that do not contain an ending sentence terminator ('.',
131 : // '?', '!', etc.) character, but are ended only by a hard separator
132 : // (CR, LF, PS, etc.) hard, or mandatory line breaks
133 1260 : return GetReadOnlyRoots().sep_string_handle();
134 : }
135 0 : return GetReadOnlyRoots().undefined_value_handle();
136 : case JSSegmenter::Granularity::COUNT:
137 0 : UNREACHABLE();
138 : }
139 0 : }
140 :
141 : // ecma402 #sec-segment-iterator-prototype-index
142 120717 : Handle<Object> JSSegmentIterator::Index(
143 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
144 : icu::BreakIterator* icu_break_iterator =
145 241434 : segment_iterator->icu_break_iterator()->raw();
146 120717 : CHECK_NOT_NULL(icu_break_iterator);
147 120717 : return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
148 : }
149 :
150 : // ecma402 #sec-segment-iterator-prototype-next
151 21366 : MaybeHandle<JSReceiver> JSSegmentIterator::Next(
152 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
153 : Factory* factory = isolate->factory();
154 : icu::BreakIterator* icu_break_iterator =
155 42732 : segment_iterator->icu_break_iterator()->raw();
156 : // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
157 21366 : int32_t prev = icu_break_iterator->current();
158 : // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
159 21366 : int32_t index = icu_break_iterator->next();
160 21366 : segment_iterator->set_is_break_type_set(true);
161 21366 : if (index == icu::BreakIterator::DONE) {
162 : // 5. If done is true, return CreateIterResultObject(undefined, true).
163 : return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
164 1152 : true);
165 : }
166 : // 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
167 20214 : Handle<Object> new_index = factory->NewNumberFromInt(index);
168 :
169 : // 8. Let segment be the substring of string from previousIndex to
170 : // newIndex, inclusive of previousIndex and exclusive of newIndex.
171 : Handle<String> segment;
172 40428 : ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
173 : segment_iterator->GetSegment(isolate, prev, index),
174 : JSReceiver);
175 :
176 : // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
177 20214 : Handle<Object> break_type = segment_iterator->BreakType();
178 :
179 : // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
180 20214 : Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
181 :
182 : // 11. Perform ! CreateDataProperty(result "segment", segment).
183 40428 : CHECK(JSReceiver::CreateDataProperty(
184 : isolate, result, factory->segment_string(), segment, kDontThrow)
185 : .FromJust());
186 :
187 : // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
188 40428 : CHECK(JSReceiver::CreateDataProperty(isolate, result,
189 : factory->breakType_string(), break_type,
190 : kDontThrow)
191 : .FromJust());
192 :
193 : // 13. Perform ! CreateDataProperty(result, "index", newIndex).
194 40428 : CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
195 : new_index, kDontThrow)
196 : .FromJust());
197 :
198 : // 14. Return CreateIterResultObject(result, false).
199 20214 : return factory->NewJSIteratorResult(result, false);
200 : }
201 :
202 : // ecma402 #sec-segment-iterator-prototype-following
203 27108 : Maybe<bool> JSSegmentIterator::Following(
204 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
205 : Handle<Object> from_obj) {
206 : Factory* factory = isolate->factory();
207 : icu::BreakIterator* icu_break_iterator =
208 54216 : segment_iterator->icu_break_iterator()->raw();
209 : // 3. If from is not undefined,
210 54216 : if (!from_obj->IsUndefined()) {
211 : // a. Let from be ? ToIndex(from).
212 : uint32_t from;
213 : Handle<Object> index;
214 126 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
215 : isolate, index,
216 : Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
217 : Nothing<bool>());
218 108 : if (!index->ToArrayIndex(&from)) {
219 0 : THROW_NEW_ERROR_RETURN_VALUE(
220 : isolate,
221 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
222 : factory->NewStringFromStaticChars("from"),
223 : factory->NewStringFromStaticChars("following"), index),
224 : Nothing<bool>());
225 : }
226 : // b. Let length be the length of iterator.[[SegmentIteratorString]].
227 : uint32_t length =
228 54 : static_cast<uint32_t>(icu_break_iterator->getText().getLength());
229 :
230 : // c. If from ≥ length, throw a RangeError exception.
231 54 : if (from >= length) {
232 72 : THROW_NEW_ERROR_RETURN_VALUE(
233 : isolate,
234 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
235 : factory->NewStringFromStaticChars("from"),
236 : factory->NewStringFromStaticChars("following"),
237 : from_obj),
238 : Nothing<bool>());
239 : }
240 :
241 : // d. Let iterator.[[SegmentIteratorPosition]] be from.
242 36 : segment_iterator->set_is_break_type_set(true);
243 36 : icu_break_iterator->following(from);
244 : return Just(false);
245 : }
246 : // 4. return AdvanceSegmentIterator(iterator, forward).
247 : // 4. .... or if direction is backwards and position is 0, return true.
248 : // 4. If direction is forwards and position is the length of string ... return
249 : // true.
250 27045 : segment_iterator->set_is_break_type_set(true);
251 27045 : return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
252 : }
253 :
254 : // ecma402 #sec-segment-iterator-prototype-preceding
255 10710 : Maybe<bool> JSSegmentIterator::Preceding(
256 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
257 : Handle<Object> from_obj) {
258 : Factory* factory = isolate->factory();
259 : icu::BreakIterator* icu_break_iterator =
260 21420 : segment_iterator->icu_break_iterator()->raw();
261 : // 3. If from is not undefined,
262 21420 : if (!from_obj->IsUndefined()) {
263 : // a. Let from be ? ToIndex(from).
264 : uint32_t from;
265 : Handle<Object> index;
266 1296 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
267 : isolate, index,
268 : Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
269 : Nothing<bool>());
270 :
271 1278 : if (!index->ToArrayIndex(&from)) {
272 0 : THROW_NEW_ERROR_RETURN_VALUE(
273 : isolate,
274 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
275 : factory->NewStringFromStaticChars("from"),
276 : factory->NewStringFromStaticChars("preceding"), index),
277 : Nothing<bool>());
278 : }
279 : // b. Let length be the length of iterator.[[SegmentIteratorString]].
280 : uint32_t length =
281 639 : static_cast<uint32_t>(icu_break_iterator->getText().getLength());
282 : // c. If from > length or from = 0, throw a RangeError exception.
283 639 : if (from > length || from == 0) {
284 144 : THROW_NEW_ERROR_RETURN_VALUE(
285 : isolate,
286 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
287 : factory->NewStringFromStaticChars("from"),
288 : factory->NewStringFromStaticChars("preceding"),
289 : from_obj),
290 : Nothing<bool>());
291 : }
292 : // d. Let iterator.[[SegmentIteratorIndex]] be from.
293 603 : segment_iterator->set_is_break_type_set(true);
294 603 : icu_break_iterator->preceding(from);
295 : return Just(false);
296 : }
297 : // 4. return AdvanceSegmentIterator(iterator, backwards).
298 : // 4. .... or if direction is backwards and position is 0, return true.
299 10062 : segment_iterator->set_is_break_type_set(true);
300 10062 : return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
301 : }
302 :
303 : } // namespace internal
304 183867 : } // namespace v8
|