LCOV - code coverage report
Current view: top level - src/objects - js-segment-iterator.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 70 80 87.5 %
Date: 2019-03-21 Functions: 8 9 88.9 %

          Line data    Source code
       1             : // Copyright 2018 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_INTL_SUPPORT
       6             : #error Internationalization is expected to be enabled.
       7             : #endif  // V8_INTL_SUPPORT
       8             : 
       9             : #include "src/objects/js-segment-iterator.h"
      10             : 
      11             : #include <map>
      12             : #include <memory>
      13             : #include <string>
      14             : 
      15             : #include "src/heap/factory.h"
      16             : #include "src/isolate.h"
      17             : #include "src/objects-inl.h"
      18             : #include "src/objects/intl-objects.h"
      19             : #include "src/objects/js-segment-iterator-inl.h"
      20             : #include "src/objects/managed.h"
      21             : #include "unicode/brkiter.h"
      22             : 
      23             : namespace v8 {
      24             : namespace internal {
      25             : 
      26       16920 : MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
      27             :                                                   int32_t start,
      28             :                                                   int32_t end) const {
      29       33840 :   return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
      30             : }
      31             : 
      32           0 : Handle<String> JSSegmentIterator::GranularityAsString() const {
      33           0 :   switch (granularity()) {
      34             :     case JSSegmenter::Granularity::GRAPHEME:
      35             :       return GetReadOnlyRoots().grapheme_string_handle();
      36             :     case JSSegmenter::Granularity::WORD:
      37             :       return GetReadOnlyRoots().word_string_handle();
      38             :     case JSSegmenter::Granularity::SENTENCE:
      39             :       return GetReadOnlyRoots().sentence_string_handle();
      40             :     case JSSegmenter::Granularity::COUNT:
      41           0 :       UNREACHABLE();
      42             :   }
      43           0 : }
      44             : 
      45        2772 : MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
      46             :     Isolate* isolate, icu::BreakIterator* break_iterator,
      47             :     JSSegmenter::Granularity granularity, Handle<String> text) {
      48        2772 :   CHECK_NOT_NULL(break_iterator);
      49             :   // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
      50             :   Handle<Map> map = Handle<Map>(
      51        8316 :       isolate->native_context()->intl_segment_iterator_map(), isolate);
      52        2772 :   Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
      53             : 
      54             :   Handle<JSSegmentIterator> segment_iterator =
      55             :       Handle<JSSegmentIterator>::cast(result);
      56             : 
      57             :   segment_iterator->set_flags(0);
      58        2772 :   segment_iterator->set_granularity(granularity);
      59             :   // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
      60             :   Handle<Managed<icu::BreakIterator>> managed_break_iterator =
      61        2772 :       Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
      62        2772 :   segment_iterator->set_icu_break_iterator(*managed_break_iterator);
      63             : 
      64             :   // 3. Let iterator.[[SegmentIteratorString]] be string.
      65             :   Managed<icu::UnicodeString> unicode_string =
      66        2772 :       Intl::SetTextToBreakIterator(isolate, text, break_iterator);
      67        2772 :   segment_iterator->set_unicode_string(unicode_string);
      68             : 
      69             :   // 4. Let iterator.[[SegmentIteratorIndex]] be 0.
      70             :   // step 4 is stored inside break_iterator.
      71             : 
      72             :   // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
      73        2772 :   segment_iterator->set_is_break_type_set(false);
      74             : 
      75        2772 :   return segment_iterator;
      76             : }
      77             : 
      78             : // ecma402 #sec-segment-iterator-prototype-breakType
      79       53046 : Handle<Object> JSSegmentIterator::BreakType() const {
      80       53046 :   if (!is_break_type_set()) {
      81         468 :     return GetReadOnlyRoots().undefined_value_handle();
      82             :   }
      83      105156 :   icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
      84       52578 :   int32_t rule_status = break_iterator->getRuleStatus();
      85       52578 :   switch (granularity()) {
      86             :     case JSSegmenter::Granularity::GRAPHEME:
      87       28998 :       return GetReadOnlyRoots().undefined_value_handle();
      88             :     case JSSegmenter::Granularity::WORD:
      89       22041 :       if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
      90             :         // "words" that do not fit into any of other categories. Includes spaces
      91             :         // and most punctuation.
      92        9387 :         return GetReadOnlyRoots().none_string_handle();
      93             :       }
      94       12654 :       if ((rule_status >= UBRK_WORD_NUMBER &&
      95             :            rule_status < UBRK_WORD_NUMBER_LIMIT) ||
      96             :           (rule_status >= UBRK_WORD_LETTER &&
      97             :            rule_status < UBRK_WORD_LETTER_LIMIT) ||
      98             :           (rule_status >= UBRK_WORD_KANA &&
      99       12654 :            rule_status < UBRK_WORD_KANA_LIMIT) ||
     100             :           (rule_status >= UBRK_WORD_IDEO &&
     101             :            rule_status < UBRK_WORD_IDEO_LIMIT)) {
     102             :         // words that appear to be numbers, letters, kana characters,
     103             :         // ideographic characters, etc
     104       12654 :         return GetReadOnlyRoots().word_string_handle();
     105             :       }
     106           0 :       return GetReadOnlyRoots().undefined_value_handle();
     107             :     case JSSegmenter::Granularity::SENTENCE:
     108        1539 :       if (rule_status >= UBRK_SENTENCE_TERM &&
     109             :           rule_status < UBRK_SENTENCE_TERM_LIMIT) {
     110             :         // sentences ending with a sentence terminator ('.', '?', '!', etc.)
     111             :         // character, possibly followed by a hard separator (CR, LF, PS, etc.)
     112         909 :         return GetReadOnlyRoots().term_string_handle();
     113             :       }
     114         630 :       if ((rule_status >= UBRK_SENTENCE_SEP &&
     115             :            rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
     116             :         // sentences that do not contain an ending sentence terminator ('.',
     117             :         // '?', '!', etc.) character, but are ended only by a hard separator
     118             :         // (CR, LF, PS, etc.) hard, or mandatory line breaks
     119         630 :         return GetReadOnlyRoots().sep_string_handle();
     120             :       }
     121           0 :       return GetReadOnlyRoots().undefined_value_handle();
     122             :     case JSSegmenter::Granularity::COUNT:
     123           0 :       UNREACHABLE();
     124             :   }
     125           0 : }
     126             : 
     127             : // ecma402 #sec-segment-iterator-prototype-index
     128      100935 : Handle<Object> JSSegmentIterator::Index(
     129             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
     130             :   icu::BreakIterator* icu_break_iterator =
     131      201870 :       segment_iterator->icu_break_iterator()->raw();
     132      100935 :   CHECK_NOT_NULL(icu_break_iterator);
     133      100935 :   return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
     134             : }
     135             : 
     136             : // ecma402 #sec-segment-iterator-prototype-next
     137       17784 : MaybeHandle<JSReceiver> JSSegmentIterator::Next(
     138             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
     139             :   Factory* factory = isolate->factory();
     140             :   icu::BreakIterator* icu_break_iterator =
     141             :       segment_iterator->icu_break_iterator()->raw();
     142             :   // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
     143       17784 :   int32_t prev = icu_break_iterator->current();
     144             :   // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
     145       17784 :   int32_t index = icu_break_iterator->next();
     146       17784 :   segment_iterator->set_is_break_type_set(true);
     147       17784 :   if (index == icu::BreakIterator::DONE) {
     148             :     // 5. If done is true, return CreateIterResultObject(undefined, true).
     149             :     return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
     150         864 :                                         true);
     151             :   }
     152             :   // 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
     153       16920 :   Handle<Object> new_index = factory->NewNumberFromInt(index);
     154             : 
     155             :   // 8. Let segment be the substring of string from previousIndex to
     156             :   // newIndex, inclusive of previousIndex and exclusive of newIndex.
     157             :   Handle<String> segment;
     158       33840 :   ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
     159             :                              segment_iterator->GetSegment(isolate, prev, index),
     160             :                              JSReceiver);
     161             : 
     162             :   // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
     163       16920 :   Handle<Object> break_type = segment_iterator->BreakType();
     164             : 
     165             :   // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
     166       16920 :   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
     167             : 
     168             :   // 11. Perform ! CreateDataProperty(result "segment", segment).
     169       33840 :   CHECK(JSReceiver::CreateDataProperty(isolate, result,
     170             :                                        factory->segment_string(), segment,
     171             :                                        Just(kDontThrow))
     172             :             .FromJust());
     173             : 
     174             :   // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
     175       33840 :   CHECK(JSReceiver::CreateDataProperty(isolate, result,
     176             :                                        factory->breakType_string(), break_type,
     177             :                                        Just(kDontThrow))
     178             :             .FromJust());
     179             : 
     180             :   // 13. Perform ! CreateDataProperty(result, "index", newIndex).
     181       33840 :   CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
     182             :                                        new_index, Just(kDontThrow))
     183             :             .FromJust());
     184             : 
     185             :   // 14. Return CreateIterResultObject(result, false).
     186       16920 :   return factory->NewJSIteratorResult(result, false);
     187             : }
     188             : 
     189             : // ecma402 #sec-segment-iterator-prototype-following
     190       17748 : Maybe<bool> JSSegmentIterator::Following(
     191             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
     192             :     Handle<Object> from_obj) {
     193             :   Factory* factory = isolate->factory();
     194             :   icu::BreakIterator* icu_break_iterator =
     195             :       segment_iterator->icu_break_iterator()->raw();
     196             :   // 3. If from is not undefined,
     197       17748 :   if (!from_obj->IsUndefined()) {
     198             :     // a. Let from be ? ToIndex(from).
     199             :     uint32_t from;
     200             :     Handle<Object> index;
     201         126 :     ASSIGN_RETURN_ON_EXCEPTION_VALUE(
     202             :         isolate, index,
     203             :         Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
     204             :         Nothing<bool>());
     205         108 :     if (!index->ToArrayIndex(&from)) {
     206           0 :       THROW_NEW_ERROR_RETURN_VALUE(
     207             :           isolate,
     208             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     209             :                         factory->NewStringFromStaticChars("from"),
     210             :                         factory->NewStringFromStaticChars("following"), index),
     211             :           Nothing<bool>());
     212             :     }
     213             :     // b. Let length be the length of iterator.[[SegmentIteratorString]].
     214             :     uint32_t length =
     215          54 :         static_cast<uint32_t>(icu_break_iterator->getText().getLength());
     216             : 
     217             :     // c. If from ≥ length, throw a RangeError exception.
     218          54 :     if (from >= length) {
     219          72 :       THROW_NEW_ERROR_RETURN_VALUE(
     220             :           isolate,
     221             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     222             :                         factory->NewStringFromStaticChars("from"),
     223             :                         factory->NewStringFromStaticChars("following"),
     224             :                         from_obj),
     225             :           Nothing<bool>());
     226             :     }
     227             : 
     228             :     // d. Let iterator.[[SegmentIteratorPosition]] be from.
     229          36 :     segment_iterator->set_is_break_type_set(true);
     230          36 :     icu_break_iterator->following(from);
     231             :     return Just(false);
     232             :   }
     233             :   // 4. return AdvanceSegmentIterator(iterator, forward).
     234             :   // 4. .... or if direction is backwards and position is 0, return true.
     235             :   // 4. If direction is forwards and position is the length of string ... return
     236             :   // true.
     237       17685 :   segment_iterator->set_is_break_type_set(true);
     238       17685 :   return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
     239             : }
     240             : 
     241             : // ecma402 #sec-segment-iterator-prototype-preceding
     242        8919 : Maybe<bool> JSSegmentIterator::Preceding(
     243             :     Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
     244             :     Handle<Object> from_obj) {
     245             :   Factory* factory = isolate->factory();
     246             :   icu::BreakIterator* icu_break_iterator =
     247             :       segment_iterator->icu_break_iterator()->raw();
     248             :   // 3. If from is not undefined,
     249        8919 :   if (!from_obj->IsUndefined()) {
     250             :     // a. Let from be ? ToIndex(from).
     251             :     uint32_t from;
     252             :     Handle<Object> index;
     253        1008 :     ASSIGN_RETURN_ON_EXCEPTION_VALUE(
     254             :         isolate, index,
     255             :         Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
     256             :         Nothing<bool>());
     257             : 
     258         990 :     if (!index->ToArrayIndex(&from)) {
     259           0 :       THROW_NEW_ERROR_RETURN_VALUE(
     260             :           isolate,
     261             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     262             :                         factory->NewStringFromStaticChars("from"),
     263             :                         factory->NewStringFromStaticChars("preceding"), index),
     264             :           Nothing<bool>());
     265             :     }
     266             :     // b. Let length be the length of iterator.[[SegmentIteratorString]].
     267             :     uint32_t length =
     268         495 :         static_cast<uint32_t>(icu_break_iterator->getText().getLength());
     269             :     // c. If from > length or from = 0, throw a RangeError exception.
     270         495 :     if (from > length || from == 0) {
     271         144 :       THROW_NEW_ERROR_RETURN_VALUE(
     272             :           isolate,
     273             :           NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
     274             :                         factory->NewStringFromStaticChars("from"),
     275             :                         factory->NewStringFromStaticChars("preceding"),
     276             :                         from_obj),
     277             :           Nothing<bool>());
     278             :     }
     279             :     // d. Let iterator.[[SegmentIteratorIndex]] be from.
     280         459 :     segment_iterator->set_is_break_type_set(true);
     281         459 :     icu_break_iterator->preceding(from);
     282             :     return Just(false);
     283             :   }
     284             :   // 4. return AdvanceSegmentIterator(iterator, backwards).
     285             :   // 4. .... or if direction is backwards and position is 0, return true.
     286        8415 :   segment_iterator->set_is_break_type_set(true);
     287        8415 :   return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
     288             : }
     289             : 
     290             : }  // namespace internal
     291      120216 : }  // namespace v8

Generated by: LCOV version 1.10