LCOV - code coverage report
Current view: top level - src/objects - string.h (source / functions) Hit Total Coverage
Test: app.info Lines: 45 53 84.9 %
Date: 2019-03-21 Functions: 6 15 40.0 %

          Line data    Source code
       1             : // Copyright 2017 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_OBJECTS_STRING_H_
       6             : #define V8_OBJECTS_STRING_H_
       7             : 
       8             : #include "src/base/bits.h"
       9             : #include "src/objects/instance-type.h"
      10             : #include "src/objects/name.h"
      11             : #include "src/objects/smi.h"
      12             : #include "src/unicode-decoder.h"
      13             : 
      14             : // Has to be the last include (doesn't have include guards):
      15             : #include "src/objects/object-macros.h"
      16             : 
      17             : namespace v8 {
      18             : namespace internal {
      19             : 
      20             : enum InstanceType : uint16_t;
      21             : 
      22             : enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
      23             : enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
      24             : 
      25             : // The characteristics of a string are stored in its map.  Retrieving these
      26             : // few bits of information is moderately expensive, involving two memory
      27             : // loads where the second is dependent on the first.  To improve efficiency
      28             : // the shape of the string is given its own class so that it can be retrieved
      29             : // once and used for several string operations.  A StringShape is small enough
      30             : // to be passed by value and is immutable, but be aware that flattening a
      31             : // string can potentially alter its shape.  Also be aware that a GC caused by
      32             : // something else can alter the shape of a string due to ConsString
      33             : // shortcutting.  Keeping these restrictions in mind has proven to be error-
      34             : // prone and so we no longer put StringShapes in variables unless there is a
      35             : // concrete performance benefit at that particular point in the code.
      36             : class StringShape {
      37             :  public:
      38             :   inline explicit StringShape(const String s);
      39             :   inline explicit StringShape(Map s);
      40             :   inline explicit StringShape(InstanceType t);
      41             :   inline bool IsSequential();
      42             :   inline bool IsExternal();
      43             :   inline bool IsCons();
      44             :   inline bool IsSliced();
      45             :   inline bool IsThin();
      46             :   inline bool IsIndirect();
      47             :   inline bool IsExternalOneByte();
      48             :   inline bool IsExternalTwoByte();
      49             :   inline bool IsSequentialOneByte();
      50             :   inline bool IsSequentialTwoByte();
      51             :   inline bool IsInternalized();
      52             :   inline StringRepresentationTag representation_tag();
      53             :   inline uint32_t encoding_tag();
      54             :   inline uint32_t full_representation_tag();
      55             : #ifdef DEBUG
      56             :   inline uint32_t type() { return type_; }
      57             :   inline void invalidate() { valid_ = false; }
      58             :   inline bool valid() { return valid_; }
      59             : #else
      60             :   inline void invalidate() {}
      61             : #endif
      62             : 
      63             :  private:
      64             :   uint32_t type_;
      65             : #ifdef DEBUG
      66             :   inline void set_valid() { valid_ = true; }
      67             :   bool valid_;
      68             : #else
      69        1064 :   inline void set_valid() {}
      70             : #endif
      71             : };
      72             : 
      73             : // The String abstract class captures JavaScript string values:
      74             : //
      75             : // Ecma-262:
      76             : //  4.3.16 String Value
      77             : //    A string value is a member of the type String and is a finite
      78             : //    ordered sequence of zero or more 16-bit unsigned integer values.
      79             : //
      80             : // All string values have a length field.
      81             : class String : public Name {
      82             :  public:
      83             :   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
      84             : 
      85             :   // Representation of the flat content of a String.
      86             :   // A non-flat string doesn't have flat content.
      87             :   // A flat string has content that's encoded as a sequence of either
      88             :   // one-byte chars or two-byte UC16.
      89             :   // Returned by String::GetFlatContent().
      90             :   class FlatContent {
      91             :    public:
      92             :     // Returns true if the string is flat and this structure contains content.
      93             :     bool IsFlat() const { return state_ != NON_FLAT; }
      94             :     // Returns true if the structure contains one-byte content.
      95     1503228 :     bool IsOneByte() const { return state_ == ONE_BYTE; }
      96             :     // Returns true if the structure contains two-byte content.
      97             :     bool IsTwoByte() const { return state_ == TWO_BYTE; }
      98             : 
      99             :     // Return the one byte content of the string. Only use if IsOneByte()
     100             :     // returns true.
     101             :     Vector<const uint8_t> ToOneByteVector() const {
     102             :       DCHECK_EQ(ONE_BYTE, state_);
     103    32325499 :       return Vector<const uint8_t>(onebyte_start, length_);
     104             :     }
     105             :     // Return the two-byte content of the string. Only use if IsTwoByte()
     106             :     // returns true.
     107             :     Vector<const uc16> ToUC16Vector() const {
     108             :       DCHECK_EQ(TWO_BYTE, state_);
     109     6077803 :       return Vector<const uc16>(twobyte_start, length_);
     110             :     }
     111             : 
     112             :     uc16 Get(int i) const {
     113             :       DCHECK(i < length_);
     114             :       DCHECK(state_ != NON_FLAT);
     115    62191680 :       if (state_ == ONE_BYTE) return onebyte_start[i];
     116    15286027 :       return twobyte_start[i];
     117             :     }
     118             : 
     119             :     bool UsesSameString(const FlatContent& other) const {
     120             :       return onebyte_start == other.onebyte_start;
     121             :     }
     122             : 
     123             :    private:
     124             :     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
     125             : 
     126             :     // Constructors only used by String::GetFlatContent().
     127             :     explicit FlatContent(const uint8_t* start, int length)
     128             :         : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
     129             :     explicit FlatContent(const uc16* start, int length)
     130             :         : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
     131             :     FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
     132             : 
     133             :     union {
     134             :       const uint8_t* onebyte_start;
     135             :       const uc16* twobyte_start;
     136             :     };
     137             :     int length_;
     138             :     State state_;
     139             : 
     140             :     friend class String;
     141             :     friend class IterableSubString;
     142             :   };
     143             : 
     144             :   template <typename Char>
     145             :   V8_INLINE Vector<const Char> GetCharVector(
     146             :       const DisallowHeapAllocation& no_gc);
     147             : 
     148             :   // Get and set the length of the string.
     149             :   inline int length() const;
     150             :   inline void set_length(int value);
     151             : 
     152             :   // Get and set the length of the string using acquire loads and release
     153             :   // stores.
     154             :   inline int synchronized_length() const;
     155             :   inline void synchronized_set_length(int value);
     156             : 
     157             :   // Returns whether this string has only one-byte chars, i.e. all of them can
     158             :   // be one-byte encoded.  This might be the case even if the string is
     159             :   // two-byte.  Such strings may appear when the embedder prefers
     160             :   // two-byte external representations even for one-byte data.
     161             :   inline bool IsOneByteRepresentation() const;
     162             :   inline bool IsTwoByteRepresentation() const;
     163             : 
     164             :   // Cons and slices have an encoding flag that may not represent the actual
     165             :   // encoding of the underlying string.  This is taken into account here.
     166             :   // This function is static because that helps it get inlined.
     167             :   // Requires: string.IsFlat()
     168             :   static inline bool IsOneByteRepresentationUnderneath(String string);
     169             : 
     170             :   // Get and set individual two byte chars in the string.
     171             :   inline void Set(int index, uint16_t value);
     172             :   // Get individual two byte char in the string.  Repeated calls
     173             :   // to this method are not efficient unless the string is flat.
     174             :   V8_INLINE uint16_t Get(int index);
     175             : 
     176             :   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
     177             :   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
     178             : 
     179             :   // Flattens the string.  Checks first inline to see if it is
     180             :   // necessary.  Does nothing if the string is not a cons string.
     181             :   // Flattening allocates a sequential string with the same data as
     182             :   // the given string and mutates the cons string to a degenerate
     183             :   // form, where the first component is the new sequential string and
     184             :   // the second component is the empty string.  If allocation fails,
     185             :   // this function returns a failure.  If flattening succeeds, this
     186             :   // function returns the sequential string that is now the first
     187             :   // component of the cons string.
     188             :   //
     189             :   // Degenerate cons strings are handled specially by the garbage
     190             :   // collector (see IsShortcutCandidate).
     191             : 
     192             :   static inline Handle<String> Flatten(
     193             :       Isolate* isolate, Handle<String> string,
     194             :       AllocationType allocation = AllocationType::kYoung);
     195             : 
     196             :   // Tries to return the content of a flat string as a structure holding either
     197             :   // a flat vector of char or of uc16.
     198             :   // If the string isn't flat, and therefore doesn't have flat content, the
     199             :   // returned structure will report so, and can't provide a vector of either
     200             :   // kind.
     201             :   FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc);
     202             : 
     203             :   // Returns the parent of a sliced string or first part of a flat cons string.
     204             :   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
     205             :   inline String GetUnderlying();
     206             : 
     207             :   // String relational comparison, implemented according to ES6 section 7.2.11
     208             :   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
     209             :   // simple lexicographic ordering on sequences of code unit values. There is no
     210             :   // attempt to use the more complex, semantically oriented definitions of
     211             :   // character or string equality and collating order defined in the Unicode
     212             :   // specification. Therefore String values that are canonically equal according
     213             :   // to the Unicode standard could test as unequal. In effect this algorithm
     214             :   // assumes that both Strings are already in normalized form. Also, note that
     215             :   // for strings containing supplementary characters, lexicographic ordering on
     216             :   // sequences of UTF-16 code unit values differs from that on sequences of code
     217             :   // point values.
     218             :   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
     219             :                                                         Handle<String> x,
     220             :                                                         Handle<String> y);
     221             : 
     222             :   // Perform ES6 21.1.3.8, including checking arguments.
     223             :   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
     224             :                         Handle<Object> search, Handle<Object> position);
     225             :   // Perform string match of pattern on subject, starting at start index.
     226             :   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
     227             :   // check any arguments.
     228             :   static int IndexOf(Isolate* isolate, Handle<String> receiver,
     229             :                      Handle<String> search, int start_index);
     230             : 
     231             :   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
     232             :                             Handle<Object> search, Handle<Object> position);
     233             : 
     234             :   // Encapsulates logic related to a match and its capture groups as required
     235             :   // by GetSubstitution.
     236        3894 :   class Match {
     237             :    public:
     238             :     virtual Handle<String> GetMatch() = 0;
     239             :     virtual Handle<String> GetPrefix() = 0;
     240             :     virtual Handle<String> GetSuffix() = 0;
     241             : 
     242             :     // A named capture can be invalid (if it is not specified in the pattern),
     243             :     // unmatched (specified but not matched in the current string), and matched.
     244             :     enum CaptureState { INVALID, UNMATCHED, MATCHED };
     245             : 
     246             :     virtual int CaptureCount() = 0;
     247             :     virtual bool HasNamedCaptures() = 0;
     248             :     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
     249             :     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
     250             :                                                 CaptureState* state) = 0;
     251             : 
     252        3894 :     virtual ~Match() = default;
     253             :   };
     254             : 
     255             :   // ES#sec-getsubstitution
     256             :   // GetSubstitution(matched, str, position, captures, replacement)
     257             :   // Expand the $-expressions in the string and return a new string with
     258             :   // the result.
     259             :   // A {start_index} can be passed to specify where to start scanning the
     260             :   // replacement string.
     261             :   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
     262             :       Isolate* isolate, Match* match, Handle<String> replacement,
     263             :       int start_index = 0);
     264             : 
     265             :   // String equality operations.
     266             :   inline bool Equals(String other);
     267             :   inline static bool Equals(Isolate* isolate, Handle<String> one,
     268             :                             Handle<String> two);
     269             :   bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
     270             : 
     271             :   // Dispatches to Is{One,Two}ByteEqualTo.
     272             :   template <typename Char>
     273             :   bool IsEqualTo(Vector<const Char> str);
     274             : 
     275             :   bool IsOneByteEqualTo(Vector<const uint8_t> str);
     276             :   bool IsTwoByteEqualTo(Vector<const uc16> str);
     277             : 
     278             :   // Return a UTF8 representation of the string.  The string is null
     279             :   // terminated but may optionally contain nulls.  Length is returned
     280             :   // in length_output if length_output is not a null pointer  The string
     281             :   // should be nearly flat, otherwise the performance of this method may
     282             :   // be very slow (quadratic in the length).  Setting robustness_flag to
     283             :   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
     284             :   // handles unexpected data without causing assert failures and it does not
     285             :   // do any heap allocations.  This is useful when printing stack traces.
     286             :   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
     287             :                                     RobustnessFlag robustness_flag, int offset,
     288             :                                     int length, int* length_output = nullptr);
     289             :   std::unique_ptr<char[]> ToCString(
     290             :       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
     291             :       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
     292             :       int* length_output = nullptr);
     293             : 
     294             :   bool ComputeArrayIndex(uint32_t* index);
     295             : 
     296             :   // Externalization.
     297             :   bool MakeExternal(v8::String::ExternalStringResource* resource);
     298             :   bool MakeExternal(v8::String::ExternalOneByteStringResource* resource);
     299             :   bool SupportsExternalization();
     300             : 
     301             :   // Conversion.
     302             :   inline bool AsArrayIndex(uint32_t* index);
     303             :   uint32_t inline ToValidIndex(Object number);
     304             : 
     305             :   // Trimming.
     306             :   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
     307             :   static Handle<String> Trim(Isolate* isolate, Handle<String> string,
     308             :                              TrimMode mode);
     309             : 
     310             :   DECL_CAST(String)
     311             : 
     312             :   void PrintOn(FILE* out);
     313             : 
     314             :   // For use during stack traces.  Performs rudimentary sanity check.
     315             :   bool LooksValid();
     316             : 
     317             :   // Dispatched behavior.
     318             :   void StringShortPrint(StringStream* accumulator, bool show_details = true);
     319             :   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
     320             : #if defined(DEBUG) || defined(OBJECT_PRINT)
     321             :   char* ToAsciiArray();
     322             : #endif
     323             :   DECL_PRINTER(String)
     324             :   DECL_VERIFIER(String)
     325             : 
     326             :   inline bool IsFlat();
     327             : 
     328             :   DEFINE_FIELD_OFFSET_CONSTANTS(Name::kHeaderSize,
     329             :                                 TORQUE_GENERATED_STRING_FIELDS)
     330             : 
     331             :   static const int kHeaderSize = kSize;
     332             : 
     333             :   // Max char codes.
     334             :   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
     335             :   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
     336             :   static const int kMaxUtf16CodeUnit = 0xffff;
     337             :   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
     338             :   static const uc32 kMaxCodePoint = 0x10ffff;
     339             : 
     340             :   // Maximal string length.
     341             :   // The max length is different on 32 and 64 bit platforms. Max length for a
     342             :   // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
     343             :   // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
     344             :   // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
     345             :   // each char needs two bytes, subtract 24 bytes for the string header size.
     346             : 
     347             :   // See include/v8.h for the definition.
     348             :   static const int kMaxLength = v8::String::kMaxLength;
     349             :   static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kHeaderSize),
     350             :                 "Unexpected max String length");
     351             : 
     352             :   // Max length for computing hash. For strings longer than this limit the
     353             :   // string length is used as the hash value.
     354             :   static const int kMaxHashCalcLength = 16383;
     355             : 
     356             :   // Limit for truncation in short printing.
     357             :   static const int kMaxShortPrintLength = 1024;
     358             : 
     359             :   // Helper function for flattening strings.
     360             :   template <typename sinkchar>
     361             :   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
     362             : 
     363             :   // The return value may point to the first aligned word containing the first
     364             :   // non-one-byte character, rather than directly to the non-one-byte character.
     365             :   // If the return value is >= the passed length, the entire string was
     366             :   // one-byte.
     367    10328879 :   static inline int NonAsciiStart(const char* chars, int length) {
     368             :     const char* start = chars;
     369    10328879 :     const char* limit = chars + length;
     370             : 
     371    10328879 :     if (length >= kIntptrSize) {
     372             :       // Check unaligned bytes.
     373     9509328 :       while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
     374     3937153 :         if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     375         280 :           return static_cast<int>(chars - start);
     376             :         }
     377     3936873 :         ++chars;
     378             :       }
     379             :       // Check aligned words.
     380             :       DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
     381             :       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
     382   382444372 :       while (chars + sizeof(uintptr_t) <= limit) {
     383   380810634 :         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
     384        1564 :           return static_cast<int>(chars - start);
     385             :         }
     386             :         chars += sizeof(uintptr_t);
     387             :       }
     388             :     }
     389             :     // Check remaining unaligned bytes.
     390    83062257 :     while (chars < limit) {
     391    36371167 :       if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     392        3556 :         return static_cast<int>(chars - start);
     393             :       }
     394    36367611 :       ++chars;
     395             :     }
     396             : 
     397    10323479 :     return static_cast<int>(chars - start);
     398             :   }
     399             : 
     400             :   static inline bool IsAscii(const char* chars, int length) {
     401      120409 :     return NonAsciiStart(chars, length) >= length;
     402             :   }
     403             : 
     404             :   static inline bool IsAscii(const uint8_t* chars, int length) {
     405           0 :     return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
     406             :            length;
     407             :   }
     408             : 
     409             :   static inline int NonOneByteStart(const uc16* chars, int length) {
     410     1865057 :     const uc16* limit = chars + length;
     411             :     const uc16* start = chars;
     412  1527894094 :     while (chars < limit) {
     413  1526643568 :       if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
     414  1526029037 :       ++chars;
     415             :     }
     416     1250526 :     return static_cast<int>(chars - start);
     417             :   }
     418             : 
     419             :   static inline bool IsOneByte(const uc16* chars, int length) {
     420             :     return NonOneByteStart(chars, length) >= length;
     421             :   }
     422             : 
     423             :   template <class Visitor>
     424             :   static inline ConsString VisitFlat(Visitor* visitor, String string,
     425             :                                      int offset = 0);
     426             : 
     427             :   static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
     428             :                                               Handle<String> string,
     429             :                                               bool include_ending_line);
     430             : 
     431             :  private:
     432             :   friend class Name;
     433             :   friend class StringTableInsertionKey;
     434             :   friend class InternalizedStringKey;
     435             : 
     436             :   static Handle<String> SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
     437             :                                     AllocationType allocation);
     438             : 
     439             :   // Slow case of String::Equals.  This implementation works on any strings
     440             :   // but it is most efficient on strings that are almost flat.
     441             :   bool SlowEquals(String other);
     442             : 
     443             :   static bool SlowEquals(Isolate* isolate, Handle<String> one,
     444             :                          Handle<String> two);
     445             : 
     446             :   // Slow case of AsArrayIndex.
     447             :   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
     448             : 
     449             :   // Compute and set the hash code.
     450             :   uint32_t ComputeAndSetHash();
     451             : 
     452    14772841 :   OBJECT_CONSTRUCTORS(String, Name);
     453             : };
     454             : 
     455             : class SubStringRange {
     456             :  public:
     457             :   inline SubStringRange(String string, const DisallowHeapAllocation& no_gc,
     458             :                         int first = 0, int length = -1);
     459             :   class iterator;
     460             :   inline iterator begin();
     461             :   inline iterator end();
     462             : 
     463             :  private:
     464             :   String string_;
     465             :   int first_;
     466             :   int length_;
     467             :   const DisallowHeapAllocation& no_gc_;
     468             : };
     469             : 
     470             : // The SeqString abstract class captures sequential string values.
     471             : class SeqString : public String {
     472             :  public:
     473             :   DECL_CAST(SeqString)
     474             : 
     475             :   // Truncate the string in-place if possible and return the result.
     476             :   // In case of new_length == 0, the empty string is returned without
     477             :   // truncating the original string.
     478             :   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
     479             :                                                        int new_length);
     480             : 
     481             :   OBJECT_CONSTRUCTORS(SeqString, String);
     482             : };
     483             : 
     484             : class InternalizedString : public String {
     485             :  public:
     486             :   DECL_CAST(InternalizedString)
     487             :   // TODO(neis): Possibly move some stuff from String here.
     488             : 
     489             :   OBJECT_CONSTRUCTORS(InternalizedString, String);
     490             : };
     491             : 
     492             : // The OneByteString class captures sequential one-byte string objects.
     493             : // Each character in the OneByteString is an one-byte character.
     494             : class SeqOneByteString : public SeqString {
     495             :  public:
     496             :   static const bool kHasOneByteEncoding = true;
     497             : 
     498             :   // Dispatched behavior.
     499             :   inline uint16_t SeqOneByteStringGet(int index);
     500             :   inline void SeqOneByteStringSet(int index, uint16_t value);
     501             : 
     502             :   // Get the address of the characters in this string.
     503             :   inline Address GetCharsAddress();
     504             : 
     505             :   inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
     506             : 
     507             :   // Clear uninitialized padding space. This ensures that the snapshot content
     508             :   // is deterministic.
     509             :   void clear_padding();
     510             : 
     511             :   DECL_CAST(SeqOneByteString)
     512             : 
     513             :   // Garbage collection support.  This method is called by the
     514             :   // garbage collector to compute the actual size of an OneByteString
     515             :   // instance.
     516             :   inline int SeqOneByteStringSize(InstanceType instance_type);
     517             : 
     518             :   // Computes the size for an OneByteString instance of a given length.
     519         784 :   static int SizeFor(int length) {
     520   377922406 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
     521             :   }
     522             : 
     523             :   // Maximal memory usage for a single sequential one-byte string.
     524             :   static const int kMaxCharsSize = kMaxLength;
     525             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     526             :   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
     527             : 
     528             :   class BodyDescriptor;
     529             : 
     530           0 :   OBJECT_CONSTRUCTORS(SeqOneByteString, SeqString);
     531             : };
     532             : 
     533             : // The TwoByteString class captures sequential unicode string objects.
     534             : // Each character in the TwoByteString is a two-byte uint16_t.
     535             : class SeqTwoByteString : public SeqString {
     536             :  public:
     537             :   static const bool kHasOneByteEncoding = false;
     538             : 
     539             :   // Dispatched behavior.
     540             :   inline uint16_t SeqTwoByteStringGet(int index);
     541             :   inline void SeqTwoByteStringSet(int index, uint16_t value);
     542             : 
     543             :   // Get the address of the characters in this string.
     544             :   inline Address GetCharsAddress();
     545             : 
     546             :   inline uc16* GetChars(const DisallowHeapAllocation& no_gc);
     547             : 
     548             :   // Clear uninitialized padding space. This ensures that the snapshot content
     549             :   // is deterministic.
     550             :   void clear_padding();
     551             : 
     552             :   DECL_CAST(SeqTwoByteString)
     553             : 
     554             :   // Garbage collection support.  This method is called by the
     555             :   // garbage collector to compute the actual size of a TwoByteString
     556             :   // instance.
     557             :   inline int SeqTwoByteStringSize(InstanceType instance_type);
     558             : 
     559             :   // Computes the size for a TwoByteString instance of a given length.
     560         896 :   static int SizeFor(int length) {
     561   138304476 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
     562             :   }
     563             : 
     564             :   // Maximal memory usage for a single sequential two-byte string.
     565             :   static const int kMaxCharsSize = kMaxLength * 2;
     566             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     567             :   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
     568             :                 String::kMaxLength);
     569             : 
     570             :   class BodyDescriptor;
     571             : 
     572           0 :   OBJECT_CONSTRUCTORS(SeqTwoByteString, SeqString);
     573             : };
     574             : 
     575             : // The ConsString class describes string values built by using the
     576             : // addition operator on strings.  A ConsString is a pair where the
     577             : // first and second components are pointers to other string values.
     578             : // One or both components of a ConsString can be pointers to other
     579             : // ConsStrings, creating a binary tree of ConsStrings where the leaves
     580             : // are non-ConsString string values.  The string value represented by
     581             : // a ConsString can be obtained by concatenating the leaf string
     582             : // values in a left-to-right depth-first traversal of the tree.
     583             : class ConsString : public String {
     584             :  public:
     585             :   // First string of the cons cell.
     586             :   inline String first();
     587             :   // Doesn't check that the result is a string, even in debug mode.  This is
     588             :   // useful during GC where the mark bits confuse the checks.
     589             :   inline Object unchecked_first();
     590             :   inline void set_first(Isolate* isolate, String first,
     591             :                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     592             : 
     593             :   // Second string of the cons cell.
     594             :   inline String second();
     595             :   // Doesn't check that the result is a string, even in debug mode.  This is
     596             :   // useful during GC where the mark bits confuse the checks.
     597             :   inline Object unchecked_second();
     598             :   inline void set_second(Isolate* isolate, String second,
     599             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     600             : 
     601             :   // Dispatched behavior.
     602             :   V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
     603             : 
     604             :   DECL_CAST(ConsString)
     605             : 
     606             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     607             :                                 TORQUE_GENERATED_CONS_STRING_FIELDS)
     608             : 
     609             :   // Minimum length for a cons string.
     610             :   static const int kMinLength = 13;
     611             : 
     612             :   typedef FixedBodyDescriptor<kFirstOffset, kSize, kSize> BodyDescriptor;
     613             : 
     614             :   DECL_VERIFIER(ConsString)
     615             : 
     616           0 :   OBJECT_CONSTRUCTORS(ConsString, String);
     617             : };
     618             : 
     619             : // The ThinString class describes string objects that are just references
     620             : // to another string object. They are used for in-place internalization when
     621             : // the original string cannot actually be internalized in-place: in these
     622             : // cases, the original string is converted to a ThinString pointing at its
     623             : // internalized version (which is allocated as a new object).
     624             : // In terms of memory layout and most algorithms operating on strings,
     625             : // ThinStrings can be thought of as "one-part cons strings".
     626             : class ThinString : public String {
     627             :  public:
     628             :   // Actual string that this ThinString refers to.
     629             :   inline String actual() const;
     630             :   inline HeapObject unchecked_actual() const;
     631             :   inline void set_actual(String s,
     632             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     633             : 
     634             :   V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
     635             : 
     636             :   DECL_CAST(ThinString)
     637             :   DECL_VERIFIER(ThinString)
     638             : 
     639             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     640             :                                 TORQUE_GENERATED_THIN_STRING_FIELDS)
     641             : 
     642             :   typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor;
     643             : 
     644           0 :   OBJECT_CONSTRUCTORS(ThinString, String);
     645             : };
     646             : 
     647             : // The Sliced String class describes strings that are substrings of another
     648             : // sequential string.  The motivation is to save time and memory when creating
     649             : // a substring.  A Sliced String is described as a pointer to the parent,
     650             : // the offset from the start of the parent string and the length.  Using
     651             : // a Sliced String therefore requires unpacking of the parent string and
     652             : // adding the offset to the start address.  A substring of a Sliced String
     653             : // are not nested since the double indirection is simplified when creating
     654             : // such a substring.
     655             : // Currently missing features are:
     656             : //  - handling externalized parent strings
     657             : //  - external strings as parent
     658             : //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
     659             : class SlicedString : public String {
     660             :  public:
     661             :   inline String parent();
     662             :   inline void set_parent(Isolate* isolate, String parent,
     663             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     664             :   inline int offset() const;
     665             :   inline void set_offset(int offset);
     666             : 
     667             :   // Dispatched behavior.
     668             :   V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
     669             : 
     670             :   DECL_CAST(SlicedString)
     671             : 
     672             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     673             :                                 TORQUE_GENERATED_SLICED_STRING_FIELDS)
     674             : 
     675             :   // Minimum length for a sliced string.
     676             :   static const int kMinLength = 13;
     677             : 
     678             :   typedef FixedBodyDescriptor<kParentOffset, kSize, kSize> BodyDescriptor;
     679             : 
     680             :   DECL_VERIFIER(SlicedString)
     681             : 
     682           0 :   OBJECT_CONSTRUCTORS(SlicedString, String);
     683             : };
     684             : 
     685             : // The ExternalString class describes string values that are backed by
     686             : // a string resource that lies outside the V8 heap.  ExternalStrings
     687             : // consist of the length field common to all strings, a pointer to the
     688             : // external resource.  It is important to ensure (externally) that the
     689             : // resource is not deallocated while the ExternalString is live in the
     690             : // V8 heap.
     691             : //
     692             : // The API expects that all ExternalStrings are created through the
     693             : // API.  Therefore, ExternalStrings should not be used internally.
     694             : class ExternalString : public String {
     695             :  public:
     696             :   DECL_CAST(ExternalString)
     697             : 
     698             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     699             :                                 TORQUE_GENERATED_EXTERNAL_STRING_FIELDS)
     700             : 
     701             :   // Size of uncached external strings.
     702             :   static const int kUncachedSize =
     703             :       kResourceOffset + FIELD_SIZE(kResourceOffset);
     704             : 
     705             :   // Return whether the external string data pointer is not cached.
     706             :   inline bool is_uncached() const;
     707             :   // Size in bytes of the external payload.
     708             :   int ExternalPayloadSize() const;
     709             : 
     710             :   // Used in the serializer/deserializer.
     711             :   inline Address resource_as_address();
     712             :   inline void set_address_as_resource(Address address);
     713             :   inline uint32_t resource_as_uint32();
     714             :   inline void set_uint32_as_resource(uint32_t value);
     715             : 
     716             :   // Disposes string's resource object if it has not already been disposed.
     717             :   inline void DisposeResource();
     718             : 
     719             :   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
     720             : 
     721             :   OBJECT_CONSTRUCTORS(ExternalString, String);
     722             : };
     723             : 
     724             : // The ExternalOneByteString class is an external string backed by an
     725             : // one-byte string.
     726             : class ExternalOneByteString : public ExternalString {
     727             :  public:
     728             :   static const bool kHasOneByteEncoding = true;
     729             : 
     730             :   typedef v8::String::ExternalOneByteStringResource Resource;
     731             : 
     732             :   // The underlying resource.
     733             :   inline const Resource* resource();
     734             : 
     735             :   // It is assumed that the previous resource is null. If it is not null, then
     736             :   // it is the responsability of the caller the handle the previous resource.
     737             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     738             :   // Used only during serialization.
     739             :   inline void set_resource(const Resource* buffer);
     740             : 
     741             :   // Update the pointer cache to the external character array.
     742             :   // The cached pointer is always valid, as the external character array does =
     743             :   // not move during lifetime.  Deserialization is the only exception, after
     744             :   // which the pointer cache has to be refreshed.
     745             :   inline void update_data_cache();
     746             : 
     747             :   inline const uint8_t* GetChars();
     748             : 
     749             :   // Dispatched behavior.
     750             :   inline uint16_t ExternalOneByteStringGet(int index);
     751             : 
     752             :   DECL_CAST(ExternalOneByteString)
     753             : 
     754             :   class BodyDescriptor;
     755             : 
     756           0 :   OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
     757             : };
     758             : 
     759             : // The ExternalTwoByteString class is an external string backed by a UTF-16
     760             : // encoded string.
     761             : class ExternalTwoByteString : public ExternalString {
     762             :  public:
     763             :   static const bool kHasOneByteEncoding = false;
     764             : 
     765             :   typedef v8::String::ExternalStringResource Resource;
     766             : 
     767             :   // The underlying string resource.
     768             :   inline const Resource* resource();
     769             : 
     770             :   // It is assumed that the previous resource is null. If it is not null, then
     771             :   // it is the responsability of the caller the handle the previous resource.
     772             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     773             :   // Used only during serialization.
     774             :   inline void set_resource(const Resource* buffer);
     775             : 
     776             :   // Update the pointer cache to the external character array.
     777             :   // The cached pointer is always valid, as the external character array does =
     778             :   // not move during lifetime.  Deserialization is the only exception, after
     779             :   // which the pointer cache has to be refreshed.
     780             :   inline void update_data_cache();
     781             : 
     782             :   inline const uint16_t* GetChars();
     783             : 
     784             :   // Dispatched behavior.
     785             :   inline uint16_t ExternalTwoByteStringGet(int index);
     786             : 
     787             :   // For regexp code.
     788             :   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
     789             : 
     790             :   DECL_CAST(ExternalTwoByteString)
     791             : 
     792             :   class BodyDescriptor;
     793             : 
     794           0 :   OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
     795             : };
     796             : 
     797             : // A flat string reader provides random access to the contents of a
     798             : // string independent of the character width of the string.  The handle
     799             : // must be valid as long as the reader is being used.
     800     3007358 : class FlatStringReader : public Relocatable {
     801             :  public:
     802             :   FlatStringReader(Isolate* isolate, Handle<String> str);
     803             :   FlatStringReader(Isolate* isolate, Vector<const char> input);
     804             :   void PostGarbageCollection() override;
     805             :   inline uc32 Get(int index);
     806             :   template <typename Char>
     807             :   inline Char Get(int index);
     808             :   int length() { return length_; }
     809             : 
     810             :  private:
     811             :   Address* str_;
     812             :   bool is_one_byte_;
     813             :   int length_;
     814             :   const void* start_;
     815             : };
     816             : 
     817             : // This maintains an off-stack representation of the stack frames required
     818             : // to traverse a ConsString, allowing an entirely iterative and restartable
     819             : // traversal of the entire string
     820             : class ConsStringIterator {
     821             :  public:
     822     6430331 :   inline ConsStringIterator() = default;
     823       66924 :   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
     824             :     Reset(cons_string, offset);
     825       66924 :   }
     826             :   inline void Reset(ConsString cons_string, int offset = 0) {
     827    11784827 :     depth_ = 0;
     828             :     // Next will always return nullptr.
     829    11749160 :     if (cons_string.is_null()) return;
     830      108627 :     Initialize(cons_string, offset);
     831             :   }
     832             :   // Returns nullptr when complete.
     833             :   inline String Next(int* offset_out) {
     834    71466319 :     *offset_out = 0;
     835    71466319 :     if (depth_ == 0) return String();
     836    60940358 :     return Continue(offset_out);
     837             :   }
     838             : 
     839             :  private:
     840             :   static const int kStackSize = 32;
     841             :   // Use a mask instead of doing modulo operations for stack wrapping.
     842             :   static const int kDepthMask = kStackSize - 1;
     843             :   static_assert(base::bits::IsPowerOfTwo(kStackSize),
     844             :                 "kStackSize must be power of two");
     845             :   static inline int OffsetForDepth(int depth);
     846             : 
     847             :   inline void PushLeft(ConsString string);
     848             :   inline void PushRight(ConsString string);
     849             :   inline void AdjustMaximumDepth();
     850             :   inline void Pop();
     851   122115196 :   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
     852             :   void Initialize(ConsString cons_string, int offset);
     853             :   String Continue(int* offset_out);
     854             :   String NextLeaf(bool* blew_stack);
     855             :   String Search(int* offset_out);
     856             : 
     857             :   // Stack must always contain only frames for which right traversal
     858             :   // has not yet been performed.
     859             :   ConsString frames_[kStackSize];
     860             :   ConsString root_;
     861             :   int depth_;
     862             :   int maximum_depth_;
     863             :   int consumed_;
     864             :   DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
     865             : };
     866             : 
     867             : class StringCharacterStream {
     868             :  public:
     869             :   inline explicit StringCharacterStream(String string, int offset = 0);
     870             :   inline uint16_t GetNext();
     871             :   inline bool HasMore();
     872             :   inline void Reset(String string, int offset = 0);
     873             :   inline void VisitOneByteString(const uint8_t* chars, int length);
     874             :   inline void VisitTwoByteString(const uint16_t* chars, int length);
     875             : 
     876             :  private:
     877             :   ConsStringIterator iter_;
     878             :   bool is_one_byte_;
     879             :   union {
     880             :     const uint8_t* buffer8_;
     881             :     const uint16_t* buffer16_;
     882             :   };
     883             :   const uint8_t* end_;
     884             :   DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
     885             : };
     886             : 
     887             : }  // namespace internal
     888             : }  // namespace v8
     889             : 
     890             : #include "src/objects/object-macros-undef.h"
     891             : 
     892             : #endif  // V8_OBJECTS_STRING_H_

Generated by: LCOV version 1.10