LCOV - code coverage report
Current view: top level - src/objects - js-segment-iterator.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 76 93 81.7 %
Date: 2019-01-20 Functions: 9 10 90.0 %

          Line data    Source code
       1             : // Copyright 2018 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_INTL_SUPPORT
       6             : #error Internationalization is expected to be enabled.
       7             : #endif  // V8_INTL_SUPPORT
       8             : 
       9             : #include "src/objects/js-segment-iterator.h"
      10             : 
      11             : #include <map>
      12             : #include <memory>
      13             : #include <string>
      14             : 
      15             : #include "src/heap/factory.h"
      16             : #include "src/isolate.h"
      17             : #include "src/objects-inl.h"
      18             : #include "src/objects/intl-objects.h"
      19             : #include "src/objects/js-segment-iterator-inl.h"
      20             : #include "src/objects/managed.h"
      21             : #include "unicode/brkiter.h"
      22             : 
      23             : namespace v8 {
      24             : namespace internal {
      25             : 
      26       20214 : MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
      27             :                                                   int32_t start,
      28             :                                                   int32_t end) const {
      29       40428 :   return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
      30             : }
      31             : 
      32           0 : Handle<String> JSSegmentIterator::GranularityAsString() const {
      33           0 :   switch (granularity()) {
      34             :     case JSSegmenter::Granularity::GRAPHEME:
      35           0 :       return GetReadOnlyRoots().grapheme_string_handle();
      36             :     case JSSegmenter::Granularity::WORD:
      37           0 :       return GetReadOnlyRoots().word_string_handle();
      38             :     case JSSegmenter::Granularity::SENTENCE:
      39           0 :       return GetReadOnlyRoots().sentence_string_handle();
      40             :     case JSSegmenter::Granularity::LINE:
      41           0 :       return GetReadOnlyRoots().line_string_handle();
      42             :     case JSSegmenter::Granularity::COUNT:
      43           0 :       UNREACHABLE();
      44             :   }
      45           0 : }
      46             : 
      47        4320 : MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
      48             :     Isolate* isolate, icu::BreakIterator* break_iterator,
      49             :     JSSegmenter::Granularity granularity, Handle<String> text) {
      50        4320 :   CHECK_NOT_NULL(break_iterator);
      51             :   // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
      52             :   Handle<Map> map = Handle<Map>(
      53       12960 :       isolate->native_context()->intl_segment_iterator_map(), isolate);
      54        4320 :   Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
      55             : 
      56             :   Handle<JSSegmentIterator> segment_iterator =
      57        4320 :       Handle<JSSegmentIterator>::cast(result);
      58             : 
      59             :   segment_iterator->set_flags(0);
      60        4320 :   segment_iterator->set_granularity(granularity);
      61             :   // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
      62             :   Handle<Managed<icu::BreakIterator>> managed_break_iterator =
      63        4320 :       Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
      64        4320 :   segment_iterator->set_icu_break_iterator(*managed_break_iterator);
      65             : 
      66             :   // 3. Let iterator.[[SegmentIteratorString]] be string.
      67             :   Managed<icu::UnicodeString> unicode_string =
      68        4320 :       Intl::SetTextToBreakIterator(isolate, text, break_iterator);
      69        4320 :   segment_iterator->set_unicode_string(unicode_string);
      70             : 
      71             :   // 4. Let iterator.[[SegmentIteratorIndex]] be 0.
      72             :   // step 4 is stored inside break_iterator.
      73             : 
      74             :   // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
      75        4320 :   segment_iterator->set_is_break_type_set(false);
      76             : 
      77        4320 :   return segment_iterator;
      78             : }
      79             : 
      80             : // ecma402 #sec-segment-iterator-prototype-breakType
      81       68022 : Handle<Object> JSSegmentIterator::BreakType() const {
      82       68022 :   if (!is_break_type_set()) {
      83        1242 :     return GetReadOnlyRoots().undefined_value_handle();
      84             :   }
      85      134802 :   icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
      86       67401 :   int32_t rule_status = break_iterator->getRuleStatus();
      87       67401 :   switch (granularity()) {
      88             :     case JSSegmenter::Granularity::GRAPHEME:
      89       57996 :       return GetReadOnlyRoots().undefined_value_handle();
      90             :     case JSSegmenter::Granularity::WORD:
      91       22041 :       if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
      92             :         // "words" that do not fit into any of other categories. Includes spaces
      93             :         // and most punctuation.
      94       18774 :         return GetReadOnlyRoots().none_string_handle();
      95             :       }
      96       12654 :       if ((rule_status >= UBRK_WORD_NUMBER &&
      97             :            rule_status < UBRK_WORD_NUMBER_LIMIT) ||
      98             :           (rule_status >= UBRK_WORD_LETTER &&
      99             :            rule_status < UBRK_WORD_LETTER_LIMIT) ||
     100             :           (rule_status >= UBRK_WORD_KANA &&
     101       12654 :            rule_status < UBRK_WORD_KANA_LIMIT) ||
     102             :           (rule_status >= UBRK_WORD_IDEO &&
     103             :            rule_status < UBRK_WORD_IDEO_LIMIT)) {
     104             :         // words that appear to be numbers, letters, kana characters,
     105             :         // ideographic characters, etc
     106       25308 :         return GetReadOnlyRoots().word_string_handle();
     107             :       }
     108           0 :       return GetReadOnlyRoots().undefined_value_handle();
     109             :     case JSSegmenter::Granularity::LINE:
     110       14823 :       if (rule_status >= UBRK_LINE_SOFT && rule_status < UBRK_LINE_SOFT_LIMIT) {
     111             :         // soft line breaks, index at which a line break is acceptable but
     112             :         // not required
     113       29646 :         return GetReadOnlyRoots().soft_string_handle();
     114             :       }
     115           0 :       if ((rule_status >= UBRK_LINE_HARD &&
     116             :            rule_status < UBRK_LINE_HARD_LIMIT)) {
     117             :         // hard, or mandatory line breaks
     118           0 :         return GetReadOnlyRoots().hard_string_handle();
     119             :       }
     120           0 :       return GetReadOnlyRoots().undefined_value_handle();
     121             :     case JSSegmenter::Granularity::SENTENCE:
     122        1539 :       if (rule_status >= UBRK_SENTENCE_TERM &&
     123             :           rule_status < UBRK_SENTENCE_TERM_LIMIT) {
     124             :         // sentences ending with a sentence terminator ('.', '?', '!', etc.)
     125             :         // character, possibly followed by a hard separator (CR, LF, PS, etc.)
     126        1818 :         return GetReadOnlyRoots().term_string_handle();
     127             :       }
     128         630 :       if ((rule_status >= UBRK_SENTENCE_SEP &&
     129             :            rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
     130             :         // sentences that do not contain an ending sentence terminator ('.',
     131             :         // '?', '!', etc.) character, but are ended only by a hard separator
     132             :         // (CR, LF, PS, etc.) hard, or mandatory line breaks
     133        1260 :         return GetReadOnlyRoots().sep_string_handle();
     134             :       }
     135           0 :       return GetReadOnlyRoots().undefined_value_handle();
     136             :     case JSSegmenter::Granularity::COUNT:
     137           0 :       UNREACHABLE();
     138             :   }
     139           0 : }
     140             : 
     141             : // ecma402 #sec-segment-iterator-prototype-index
     142      120717 : Handle<Object> JSSegmentIterator::Index(
     143             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
     144             :   icu::BreakIterator* icu_break_iterator =
     145      241434 :       segment_iterator->icu_break_iterator()->raw();
     146      120717 :   CHECK_NOT_NULL(icu_break_iterator);
     147      120717 :   return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
     148             : }
     149             : 
     150             : // ecma402 #sec-segment-iterator-prototype-next
     151       21366 : MaybeHandle<JSReceiver> JSSegmentIterator::Next(
     152             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
     153             :   Factory* factory = isolate->factory();
     154             :   icu::BreakIterator* icu_break_iterator =
     155       42732 :       segment_iterator->icu_break_iterator()->raw();
     156             :   // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
     157       21366 :   int32_t prev = icu_break_iterator->current();
     158             :   // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
     159       21366 :   int32_t index = icu_break_iterator->next();
     160       21366 :   segment_iterator->set_is_break_type_set(true);
     161       21366 :   if (index == icu::BreakIterator::DONE) {
     162             :     // 5. If done is true, return CreateIterResultObject(undefined, true).
     163             :     return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
     164        1152 :                                         true);
     165             :   }
     166             :   // 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
     167       20214 :   Handle<Object> new_index = factory->NewNumberFromInt(index);
     168             : 
     169             :   // 8. Let segment be the substring of string from previousIndex to
     170             :   // newIndex, inclusive of previousIndex and exclusive of newIndex.
     171             :   Handle<String> segment;
     172       40428 :   ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
     173             :                              segment_iterator->GetSegment(isolate, prev, index),
     174             :                              JSReceiver);
     175             : 
     176             :   // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
     177       20214 :   Handle<Object> break_type = segment_iterator->BreakType();
     178             : 
     179             :   // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
     180       20214 :   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
     181             : 
     182             :   // 11. Perform ! CreateDataProperty(result "segment", segment).
     183       40428 :   CHECK(JSReceiver::CreateDataProperty(
     184             :             isolate, result, factory->segment_string(), segment, kDontThrow)
     185             :             .FromJust());
     186             : 
     187             :   // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
     188       40428 :   CHECK(JSReceiver::CreateDataProperty(isolate, result,
     189             :                                        factory->breakType_string(), break_type,
     190             :                                        kDontThrow)
     191             :             .FromJust());
     192             : 
     193             :   // 13. Perform ! CreateDataProperty(result, "index", newIndex).
     194       40428 :   CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
     195             :                                        new_index, kDontThrow)
     196             :             .FromJust());
     197             : 
     198             :   // 14. Return CreateIterResultObject(result, false).
     199       20214 :   return factory->NewJSIteratorResult(result, false);
     200             : }
     201             : 
     202             : // ecma402 #sec-segment-iterator-prototype-following
     203       27108 : Maybe<bool> JSSegmentIterator::Following(
     204             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
     205             :     Handle<Object> from_obj) {
     206             :   Factory* factory = isolate->factory();
     207             :   icu::BreakIterator* icu_break_iterator =
     208       54216 :       segment_iterator->icu_break_iterator()->raw();
     209             :   // 3. If from is not undefined,
     210       54216 :   if (!from_obj->IsUndefined()) {
     211             :     // a. Let from be ? ToIndex(from).
     212             :     uint32_t from;
     213             :     Handle<Object> index;
     214         126 :     ASSIGN_RETURN_ON_EXCEPTION_VALUE(
     215             :         isolate, index,
     216             :         Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
     217             :         Nothing<bool>());
     218         108 :     if (!index->ToArrayIndex(&from)) {
     219           0 :       THROW_NEW_ERROR_RETURN_VALUE(
     220             :           isolate,
     221             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     222             :                         factory->NewStringFromStaticChars("from"),
     223             :                         factory->NewStringFromStaticChars("following"), index),
     224             :           Nothing<bool>());
     225             :     }
     226             :     // b. Let length be the length of iterator.[[SegmentIteratorString]].
     227             :     uint32_t length =
     228          54 :         static_cast<uint32_t>(icu_break_iterator->getText().getLength());
     229             : 
     230             :     // c. If from ≥ length, throw a RangeError exception.
     231          54 :     if (from >= length) {
     232          72 :       THROW_NEW_ERROR_RETURN_VALUE(
     233             :           isolate,
     234             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     235             :                         factory->NewStringFromStaticChars("from"),
     236             :                         factory->NewStringFromStaticChars("following"),
     237             :                         from_obj),
     238             :           Nothing<bool>());
     239             :     }
     240             : 
     241             :     // d. Let iterator.[[SegmentIteratorPosition]] be from.
     242          36 :     segment_iterator->set_is_break_type_set(true);
     243          36 :     icu_break_iterator->following(from);
     244             :     return Just(false);
     245             :   }
     246             :   // 4. return AdvanceSegmentIterator(iterator, forward).
     247             :   // 4. .... or if direction is backwards and position is 0, return true.
     248             :   // 4. If direction is forwards and position is the length of string ... return
     249             :   // true.
     250       27045 :   segment_iterator->set_is_break_type_set(true);
     251       27045 :   return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
     252             : }
     253             : 
     254             : // ecma402 #sec-segment-iterator-prototype-preceding
     255       10710 : Maybe<bool> JSSegmentIterator::Preceding(
     256             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
     257             :     Handle<Object> from_obj) {
     258             :   Factory* factory = isolate->factory();
     259             :   icu::BreakIterator* icu_break_iterator =
     260       21420 :       segment_iterator->icu_break_iterator()->raw();
     261             :   // 3. If from is not undefined,
     262       21420 :   if (!from_obj->IsUndefined()) {
     263             :     // a. Let from be ? ToIndex(from).
     264             :     uint32_t from;
     265             :     Handle<Object> index;
     266        1296 :     ASSIGN_RETURN_ON_EXCEPTION_VALUE(
     267             :         isolate, index,
     268             :         Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
     269             :         Nothing<bool>());
     270             : 
     271        1278 :     if (!index->ToArrayIndex(&from)) {
     272           0 :       THROW_NEW_ERROR_RETURN_VALUE(
     273             :           isolate,
     274             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     275             :                         factory->NewStringFromStaticChars("from"),
     276             :                         factory->NewStringFromStaticChars("preceding"), index),
     277             :           Nothing<bool>());
     278             :     }
     279             :     // b. Let length be the length of iterator.[[SegmentIteratorString]].
     280             :     uint32_t length =
     281         639 :         static_cast<uint32_t>(icu_break_iterator->getText().getLength());
     282             :     // c. If from > length or from = 0, throw a RangeError exception.
     283         639 :     if (from > length || from == 0) {
     284         144 :       THROW_NEW_ERROR_RETURN_VALUE(
     285             :           isolate,
     286             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     287             :                         factory->NewStringFromStaticChars("from"),
     288             :                         factory->NewStringFromStaticChars("preceding"),
     289             :                         from_obj),
     290             :           Nothing<bool>());
     291             :     }
     292             :     // d. Let iterator.[[SegmentIteratorIndex]] be from.
     293         603 :     segment_iterator->set_is_break_type_set(true);
     294         603 :     icu_break_iterator->preceding(from);
     295             :     return Just(false);
     296             :   }
     297             :   // 4. return AdvanceSegmentIterator(iterator, backwards).
     298             :   // 4. .... or if direction is backwards and position is 0, return true.
     299       10062 :   segment_iterator->set_is_break_type_set(true);
     300       10062 :   return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
     301             : }
     302             : 
     303             : }  // namespace internal
     304      183867 : }  // namespace v8

Generated by: LCOV version 1.10