Line data Source code
1 : // Copyright 2018 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_INTL_SUPPORT
6 : #error Internationalization is expected to be enabled.
7 : #endif // V8_INTL_SUPPORT
8 :
9 : #include "src/objects/js-segment-iterator.h"
10 :
11 : #include <map>
12 : #include <memory>
13 : #include <string>
14 :
15 : #include "src/heap/factory.h"
16 : #include "src/isolate.h"
17 : #include "src/objects-inl.h"
18 : #include "src/objects/intl-objects.h"
19 : #include "src/objects/js-segment-iterator-inl.h"
20 : #include "src/objects/managed.h"
21 : #include "unicode/brkiter.h"
22 :
23 : namespace v8 {
24 : namespace internal {
25 :
26 16920 : MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
27 : int32_t start,
28 : int32_t end) const {
29 33840 : return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
30 : }
31 :
32 0 : Handle<String> JSSegmentIterator::GranularityAsString() const {
33 0 : switch (granularity()) {
34 : case JSSegmenter::Granularity::GRAPHEME:
35 : return GetReadOnlyRoots().grapheme_string_handle();
36 : case JSSegmenter::Granularity::WORD:
37 : return GetReadOnlyRoots().word_string_handle();
38 : case JSSegmenter::Granularity::SENTENCE:
39 : return GetReadOnlyRoots().sentence_string_handle();
40 : case JSSegmenter::Granularity::COUNT:
41 0 : UNREACHABLE();
42 : }
43 0 : }
44 :
45 2772 : MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
46 : Isolate* isolate, icu::BreakIterator* break_iterator,
47 : JSSegmenter::Granularity granularity, Handle<String> text) {
48 2772 : CHECK_NOT_NULL(break_iterator);
49 : // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
50 : Handle<Map> map = Handle<Map>(
51 8316 : isolate->native_context()->intl_segment_iterator_map(), isolate);
52 2772 : Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
53 :
54 : Handle<JSSegmentIterator> segment_iterator =
55 : Handle<JSSegmentIterator>::cast(result);
56 :
57 : segment_iterator->set_flags(0);
58 2772 : segment_iterator->set_granularity(granularity);
59 : // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
60 : Handle<Managed<icu::BreakIterator>> managed_break_iterator =
61 2772 : Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
62 2772 : segment_iterator->set_icu_break_iterator(*managed_break_iterator);
63 :
64 : // 3. Let iterator.[[SegmentIteratorString]] be string.
65 : Managed<icu::UnicodeString> unicode_string =
66 2772 : Intl::SetTextToBreakIterator(isolate, text, break_iterator);
67 2772 : segment_iterator->set_unicode_string(unicode_string);
68 :
69 : // 4. Let iterator.[[SegmentIteratorIndex]] be 0.
70 : // step 4 is stored inside break_iterator.
71 :
72 : // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
73 2772 : segment_iterator->set_is_break_type_set(false);
74 :
75 2772 : return segment_iterator;
76 : }
77 :
78 : // ecma402 #sec-segment-iterator-prototype-breakType
79 53046 : Handle<Object> JSSegmentIterator::BreakType() const {
80 53046 : if (!is_break_type_set()) {
81 468 : return GetReadOnlyRoots().undefined_value_handle();
82 : }
83 105156 : icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
84 52578 : int32_t rule_status = break_iterator->getRuleStatus();
85 52578 : switch (granularity()) {
86 : case JSSegmenter::Granularity::GRAPHEME:
87 28998 : return GetReadOnlyRoots().undefined_value_handle();
88 : case JSSegmenter::Granularity::WORD:
89 22041 : if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
90 : // "words" that do not fit into any of other categories. Includes spaces
91 : // and most punctuation.
92 9387 : return GetReadOnlyRoots().none_string_handle();
93 : }
94 12654 : if ((rule_status >= UBRK_WORD_NUMBER &&
95 : rule_status < UBRK_WORD_NUMBER_LIMIT) ||
96 : (rule_status >= UBRK_WORD_LETTER &&
97 : rule_status < UBRK_WORD_LETTER_LIMIT) ||
98 : (rule_status >= UBRK_WORD_KANA &&
99 12654 : rule_status < UBRK_WORD_KANA_LIMIT) ||
100 : (rule_status >= UBRK_WORD_IDEO &&
101 : rule_status < UBRK_WORD_IDEO_LIMIT)) {
102 : // words that appear to be numbers, letters, kana characters,
103 : // ideographic characters, etc
104 12654 : return GetReadOnlyRoots().word_string_handle();
105 : }
106 0 : return GetReadOnlyRoots().undefined_value_handle();
107 : case JSSegmenter::Granularity::SENTENCE:
108 1539 : if (rule_status >= UBRK_SENTENCE_TERM &&
109 : rule_status < UBRK_SENTENCE_TERM_LIMIT) {
110 : // sentences ending with a sentence terminator ('.', '?', '!', etc.)
111 : // character, possibly followed by a hard separator (CR, LF, PS, etc.)
112 909 : return GetReadOnlyRoots().term_string_handle();
113 : }
114 630 : if ((rule_status >= UBRK_SENTENCE_SEP &&
115 : rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
116 : // sentences that do not contain an ending sentence terminator ('.',
117 : // '?', '!', etc.) character, but are ended only by a hard separator
118 : // (CR, LF, PS, etc.) hard, or mandatory line breaks
119 630 : return GetReadOnlyRoots().sep_string_handle();
120 : }
121 0 : return GetReadOnlyRoots().undefined_value_handle();
122 : case JSSegmenter::Granularity::COUNT:
123 0 : UNREACHABLE();
124 : }
125 0 : }
126 :
127 : // ecma402 #sec-segment-iterator-prototype-index
128 100935 : Handle<Object> JSSegmentIterator::Index(
129 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
130 : icu::BreakIterator* icu_break_iterator =
131 201870 : segment_iterator->icu_break_iterator()->raw();
132 100935 : CHECK_NOT_NULL(icu_break_iterator);
133 100935 : return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
134 : }
135 :
136 : // ecma402 #sec-segment-iterator-prototype-next
137 17784 : MaybeHandle<JSReceiver> JSSegmentIterator::Next(
138 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
139 : Factory* factory = isolate->factory();
140 : icu::BreakIterator* icu_break_iterator =
141 : segment_iterator->icu_break_iterator()->raw();
142 : // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
143 17784 : int32_t prev = icu_break_iterator->current();
144 : // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
145 17784 : int32_t index = icu_break_iterator->next();
146 17784 : segment_iterator->set_is_break_type_set(true);
147 17784 : if (index == icu::BreakIterator::DONE) {
148 : // 5. If done is true, return CreateIterResultObject(undefined, true).
149 : return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
150 864 : true);
151 : }
152 : // 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
153 16920 : Handle<Object> new_index = factory->NewNumberFromInt(index);
154 :
155 : // 8. Let segment be the substring of string from previousIndex to
156 : // newIndex, inclusive of previousIndex and exclusive of newIndex.
157 : Handle<String> segment;
158 33840 : ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
159 : segment_iterator->GetSegment(isolate, prev, index),
160 : JSReceiver);
161 :
162 : // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
163 16920 : Handle<Object> break_type = segment_iterator->BreakType();
164 :
165 : // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
166 16920 : Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
167 :
168 : // 11. Perform ! CreateDataProperty(result "segment", segment).
169 33840 : CHECK(JSReceiver::CreateDataProperty(isolate, result,
170 : factory->segment_string(), segment,
171 : Just(kDontThrow))
172 : .FromJust());
173 :
174 : // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
175 33840 : CHECK(JSReceiver::CreateDataProperty(isolate, result,
176 : factory->breakType_string(), break_type,
177 : Just(kDontThrow))
178 : .FromJust());
179 :
180 : // 13. Perform ! CreateDataProperty(result, "index", newIndex).
181 33840 : CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
182 : new_index, Just(kDontThrow))
183 : .FromJust());
184 :
185 : // 14. Return CreateIterResultObject(result, false).
186 16920 : return factory->NewJSIteratorResult(result, false);
187 : }
188 :
189 : // ecma402 #sec-segment-iterator-prototype-following
190 17748 : Maybe<bool> JSSegmentIterator::Following(
191 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
192 : Handle<Object> from_obj) {
193 : Factory* factory = isolate->factory();
194 : icu::BreakIterator* icu_break_iterator =
195 : segment_iterator->icu_break_iterator()->raw();
196 : // 3. If from is not undefined,
197 17748 : if (!from_obj->IsUndefined()) {
198 : // a. Let from be ? ToIndex(from).
199 : uint32_t from;
200 : Handle<Object> index;
201 126 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
202 : isolate, index,
203 : Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
204 : Nothing<bool>());
205 108 : if (!index->ToArrayIndex(&from)) {
206 0 : THROW_NEW_ERROR_RETURN_VALUE(
207 : isolate,
208 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
209 : factory->NewStringFromStaticChars("from"),
210 : factory->NewStringFromStaticChars("following"), index),
211 : Nothing<bool>());
212 : }
213 : // b. Let length be the length of iterator.[[SegmentIteratorString]].
214 : uint32_t length =
215 54 : static_cast<uint32_t>(icu_break_iterator->getText().getLength());
216 :
217 : // c. If from ≥ length, throw a RangeError exception.
218 54 : if (from >= length) {
219 72 : THROW_NEW_ERROR_RETURN_VALUE(
220 : isolate,
221 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
222 : factory->NewStringFromStaticChars("from"),
223 : factory->NewStringFromStaticChars("following"),
224 : from_obj),
225 : Nothing<bool>());
226 : }
227 :
228 : // d. Let iterator.[[SegmentIteratorPosition]] be from.
229 36 : segment_iterator->set_is_break_type_set(true);
230 36 : icu_break_iterator->following(from);
231 : return Just(false);
232 : }
233 : // 4. return AdvanceSegmentIterator(iterator, forward).
234 : // 4. .... or if direction is backwards and position is 0, return true.
235 : // 4. If direction is forwards and position is the length of string ... return
236 : // true.
237 17685 : segment_iterator->set_is_break_type_set(true);
238 17685 : return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
239 : }
240 :
241 : // ecma402 #sec-segment-iterator-prototype-preceding
242 8919 : Maybe<bool> JSSegmentIterator::Preceding(
243 : Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
244 : Handle<Object> from_obj) {
245 : Factory* factory = isolate->factory();
246 : icu::BreakIterator* icu_break_iterator =
247 : segment_iterator->icu_break_iterator()->raw();
248 : // 3. If from is not undefined,
249 8919 : if (!from_obj->IsUndefined()) {
250 : // a. Let from be ? ToIndex(from).
251 : uint32_t from;
252 : Handle<Object> index;
253 1008 : ASSIGN_RETURN_ON_EXCEPTION_VALUE(
254 : isolate, index,
255 : Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
256 : Nothing<bool>());
257 :
258 990 : if (!index->ToArrayIndex(&from)) {
259 0 : THROW_NEW_ERROR_RETURN_VALUE(
260 : isolate,
261 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
262 : factory->NewStringFromStaticChars("from"),
263 : factory->NewStringFromStaticChars("preceding"), index),
264 : Nothing<bool>());
265 : }
266 : // b. Let length be the length of iterator.[[SegmentIteratorString]].
267 : uint32_t length =
268 495 : static_cast<uint32_t>(icu_break_iterator->getText().getLength());
269 : // c. If from > length or from = 0, throw a RangeError exception.
270 495 : if (from > length || from == 0) {
271 144 : THROW_NEW_ERROR_RETURN_VALUE(
272 : isolate,
273 : NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
274 : factory->NewStringFromStaticChars("from"),
275 : factory->NewStringFromStaticChars("preceding"),
276 : from_obj),
277 : Nothing<bool>());
278 : }
279 : // d. Let iterator.[[SegmentIteratorIndex]] be from.
280 459 : segment_iterator->set_is_break_type_set(true);
281 459 : icu_break_iterator->preceding(from);
282 : return Just(false);
283 : }
284 : // 4. return AdvanceSegmentIterator(iterator, backwards).
285 : // 4. .... or if direction is backwards and position is 0, return true.
286 8415 : segment_iterator->set_is_break_type_set(true);
287 8415 : return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
288 : }
289 :
290 : } // namespace internal
291 120216 : } // namespace v8
|