LCOV - code coverage report
Current view: top level - src/objects - string.h (source / functions) Hit Total Coverage
Test: app.info Lines: 52 53 98.1 %
Date: 2019-01-20 Functions: 20 25 80.0 %

          Line data    Source code
       1             : // Copyright 2017 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_OBJECTS_STRING_H_
       6             : #define V8_OBJECTS_STRING_H_
       7             : 
       8             : #include "src/base/bits.h"
       9             : #include "src/objects/instance-type.h"
      10             : #include "src/objects/name.h"
      11             : #include "src/objects/smi.h"
      12             : #include "src/unicode-decoder.h"
      13             : 
      14             : // Has to be the last include (doesn't have include guards):
      15             : #include "src/objects/object-macros.h"
      16             : 
      17             : namespace v8 {
      18             : namespace internal {
      19             : 
      20             : enum InstanceType : uint16_t;
      21             : 
      22             : enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
      23             : enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
      24             : 
      25             : // The characteristics of a string are stored in its map.  Retrieving these
      26             : // few bits of information is moderately expensive, involving two memory
      27             : // loads where the second is dependent on the first.  To improve efficiency
      28             : // the shape of the string is given its own class so that it can be retrieved
      29             : // once and used for several string operations.  A StringShape is small enough
      30             : // to be passed by value and is immutable, but be aware that flattening a
      31             : // string can potentially alter its shape.  Also be aware that a GC caused by
      32             : // something else can alter the shape of a string due to ConsString
      33             : // shortcutting.  Keeping these restrictions in mind has proven to be error-
      34             : // prone and so we no longer put StringShapes in variables unless there is a
      35             : // concrete performance benefit at that particular point in the code.
      36             : class StringShape {
      37             :  public:
      38             :   inline explicit StringShape(const String s);
      39             :   inline explicit StringShape(Map s);
      40             :   inline explicit StringShape(InstanceType t);
      41             :   inline bool IsSequential();
      42             :   inline bool IsExternal();
      43             :   inline bool IsCons();
      44             :   inline bool IsSliced();
      45             :   inline bool IsThin();
      46             :   inline bool IsIndirect();
      47             :   inline bool IsExternalOneByte();
      48             :   inline bool IsExternalTwoByte();
      49             :   inline bool IsSequentialOneByte();
      50             :   inline bool IsSequentialTwoByte();
      51             :   inline bool IsInternalized();
      52             :   inline StringRepresentationTag representation_tag();
      53             :   inline uint32_t encoding_tag();
      54             :   inline uint32_t full_representation_tag();
      55             :   inline bool HasOnlyOneByteChars();
      56             : #ifdef DEBUG
      57             :   inline uint32_t type() { return type_; }
      58             :   inline void invalidate() { valid_ = false; }
      59             :   inline bool valid() { return valid_; }
      60             : #else
      61             :   inline void invalidate() {}
      62             : #endif
      63             : 
      64             :  private:
      65             :   uint32_t type_;
      66             : #ifdef DEBUG
      67             :   inline void set_valid() { valid_ = true; }
      68             :   bool valid_;
      69             : #else
      70             :   inline void set_valid() {}
      71             : #endif
      72             : };
      73             : 
      74             : // The String abstract class captures JavaScript string values:
      75             : //
      76             : // Ecma-262:
      77             : //  4.3.16 String Value
      78             : //    A string value is a member of the type String and is a finite
      79             : //    ordered sequence of zero or more 16-bit unsigned integer values.
      80             : //
      81             : // All string values have a length field.
      82             : class String : public Name {
      83             :  public:
      84             :   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
      85             : 
      86             :   // Representation of the flat content of a String.
      87             :   // A non-flat string doesn't have flat content.
      88             :   // A flat string has content that's encoded as a sequence of either
      89             :   // one-byte chars or two-byte UC16.
      90             :   // Returned by String::GetFlatContent().
      91             :   class FlatContent {
      92             :    public:
      93             :     // Returns true if the string is flat and this structure contains content.
      94             :     bool IsFlat() const { return state_ != NON_FLAT; }
      95             :     // Returns true if the structure contains one-byte content.
      96     1606530 :     bool IsOneByte() const { return state_ == ONE_BYTE; }
      97             :     // Returns true if the structure contains two-byte content.
      98             :     bool IsTwoByte() const { return state_ == TWO_BYTE; }
      99             : 
     100             :     // Return the one byte content of the string. Only use if IsOneByte()
     101             :     // returns true.
     102     1440376 :     Vector<const uint8_t> ToOneByteVector() const {
     103             :       DCHECK_EQ(ONE_BYTE, state_);
     104    30697501 :       return Vector<const uint8_t>(onebyte_start, length_);
     105             :     }
     106             :     // Return the two-byte content of the string. Only use if IsTwoByte()
     107             :     // returns true.
     108      860966 :     Vector<const uc16> ToUC16Vector() const {
     109             :       DCHECK_EQ(TWO_BYTE, state_);
     110     3241197 :       return Vector<const uc16>(twobyte_start, length_);
     111             :     }
     112             : 
     113             :     uc16 Get(int i) const {
     114             :       DCHECK(i < length_);
     115             :       DCHECK(state_ != NON_FLAT);
     116    62283282 :       if (state_ == ONE_BYTE) return onebyte_start[i];
     117    15287507 :       return twobyte_start[i];
     118             :     }
     119             : 
     120             :     bool UsesSameString(const FlatContent& other) const {
     121             :       return onebyte_start == other.onebyte_start;
     122             :     }
     123             : 
     124             :    private:
     125             :     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
     126             : 
     127             :     // Constructors only used by String::GetFlatContent().
     128             :     explicit FlatContent(const uint8_t* start, int length)
     129             :         : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
     130             :     explicit FlatContent(const uc16* start, int length)
     131             :         : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
     132             :     FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
     133             : 
     134             :     union {
     135             :       const uint8_t* onebyte_start;
     136             :       const uc16* twobyte_start;
     137             :     };
     138             :     int length_;
     139             :     State state_;
     140             : 
     141             :     friend class String;
     142             :     friend class IterableSubString;
     143             :   };
     144             : 
     145             :   template <typename Char>
     146             :   V8_INLINE Vector<const Char> GetCharVector(
     147             :       const DisallowHeapAllocation& no_gc);
     148             : 
     149             :   // Get and set the length of the string.
     150             :   inline int length() const;
     151             :   inline void set_length(int value);
     152             : 
     153             :   // Get and set the length of the string using acquire loads and release
     154             :   // stores.
     155             :   inline int synchronized_length() const;
     156             :   inline void synchronized_set_length(int value);
     157             : 
     158             :   // Returns whether this string has only one-byte chars, i.e. all of them can
     159             :   // be one-byte encoded.  This might be the case even if the string is
     160             :   // two-byte.  Such strings may appear when the embedder prefers
     161             :   // two-byte external representations even for one-byte data.
     162             :   inline bool IsOneByteRepresentation() const;
     163             :   inline bool IsTwoByteRepresentation() const;
     164             : 
     165             :   // Cons and slices have an encoding flag that may not represent the actual
     166             :   // encoding of the underlying string.  This is taken into account here.
     167             :   // This function is static because that helps it get inlined.
     168             :   // Requires: string.IsFlat()
     169             :   static inline bool IsOneByteRepresentationUnderneath(String string);
     170             : 
     171             :   // NOTE: this should be considered only a hint.  False negatives are
     172             :   // possible.
     173             :   inline bool HasOnlyOneByteChars();
     174             : 
     175             :   // Get and set individual two byte chars in the string.
     176             :   inline void Set(int index, uint16_t value);
     177             :   // Get individual two byte char in the string.  Repeated calls
     178             :   // to this method are not efficient unless the string is flat.
     179             :   V8_INLINE uint16_t Get(int index);
     180             : 
     181             :   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
     182             :   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
     183             : 
     184             :   // Flattens the string.  Checks first inline to see if it is
     185             :   // necessary.  Does nothing if the string is not a cons string.
     186             :   // Flattening allocates a sequential string with the same data as
     187             :   // the given string and mutates the cons string to a degenerate
     188             :   // form, where the first component is the new sequential string and
     189             :   // the second component is the empty string.  If allocation fails,
     190             :   // this function returns a failure.  If flattening succeeds, this
     191             :   // function returns the sequential string that is now the first
     192             :   // component of the cons string.
     193             :   //
     194             :   // Degenerate cons strings are handled specially by the garbage
     195             :   // collector (see IsShortcutCandidate).
     196             : 
     197             :   static inline Handle<String> Flatten(Isolate* isolate, Handle<String> string,
     198             :                                        PretenureFlag pretenure = NOT_TENURED);
     199             : 
     200             :   // Tries to return the content of a flat string as a structure holding either
     201             :   // a flat vector of char or of uc16.
     202             :   // If the string isn't flat, and therefore doesn't have flat content, the
     203             :   // returned structure will report so, and can't provide a vector of either
     204             :   // kind.
     205             :   FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc);
     206             : 
     207             :   // Returns the parent of a sliced string or first part of a flat cons string.
     208             :   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
     209             :   inline String GetUnderlying();
     210             : 
     211             :   // String relational comparison, implemented according to ES6 section 7.2.11
     212             :   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
     213             :   // simple lexicographic ordering on sequences of code unit values. There is no
     214             :   // attempt to use the more complex, semantically oriented definitions of
     215             :   // character or string equality and collating order defined in the Unicode
     216             :   // specification. Therefore String values that are canonically equal according
     217             :   // to the Unicode standard could test as unequal. In effect this algorithm
     218             :   // assumes that both Strings are already in normalized form. Also, note that
     219             :   // for strings containing supplementary characters, lexicographic ordering on
     220             :   // sequences of UTF-16 code unit values differs from that on sequences of code
     221             :   // point values.
     222             :   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
     223             :                                                         Handle<String> x,
     224             :                                                         Handle<String> y);
     225             : 
     226             :   // Perform ES6 21.1.3.8, including checking arguments.
     227             :   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
     228             :                         Handle<Object> search, Handle<Object> position);
     229             :   // Perform string match of pattern on subject, starting at start index.
     230             :   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
     231             :   // check any arguments.
     232             :   static int IndexOf(Isolate* isolate, Handle<String> receiver,
     233             :                      Handle<String> search, int start_index);
     234             : 
     235             :   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
     236             :                             Handle<Object> search, Handle<Object> position);
     237             : 
     238             :   // Encapsulates logic related to a match and its capture groups as required
     239             :   // by GetSubstitution.
     240        3894 :   class Match {
     241             :    public:
     242             :     virtual Handle<String> GetMatch() = 0;
     243             :     virtual Handle<String> GetPrefix() = 0;
     244             :     virtual Handle<String> GetSuffix() = 0;
     245             : 
     246             :     // A named capture can be invalid (if it is not specified in the pattern),
     247             :     // unmatched (specified but not matched in the current string), and matched.
     248             :     enum CaptureState { INVALID, UNMATCHED, MATCHED };
     249             : 
     250             :     virtual int CaptureCount() = 0;
     251             :     virtual bool HasNamedCaptures() = 0;
     252             :     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
     253             :     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
     254             :                                                 CaptureState* state) = 0;
     255             : 
     256        3894 :     virtual ~Match() = default;
     257             :   };
     258             : 
     259             :   // ES#sec-getsubstitution
     260             :   // GetSubstitution(matched, str, position, captures, replacement)
     261             :   // Expand the $-expressions in the string and return a new string with
     262             :   // the result.
     263             :   // A {start_index} can be passed to specify where to start scanning the
     264             :   // replacement string.
     265             :   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
     266             :       Isolate* isolate, Match* match, Handle<String> replacement,
     267             :       int start_index = 0);
     268             : 
     269             :   // String equality operations.
     270             :   inline bool Equals(String other);
     271             :   inline static bool Equals(Isolate* isolate, Handle<String> one,
     272             :                             Handle<String> two);
     273             :   bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
     274             : 
     275             :   // Dispatches to Is{One,Two}ByteEqualTo.
     276             :   template <typename Char>
     277             :   bool IsEqualTo(Vector<const Char> str);
     278             : 
     279             :   bool IsOneByteEqualTo(Vector<const uint8_t> str);
     280             :   bool IsTwoByteEqualTo(Vector<const uc16> str);
     281             : 
     282             :   // Return a UTF8 representation of the string.  The string is null
     283             :   // terminated but may optionally contain nulls.  Length is returned
     284             :   // in length_output if length_output is not a null pointer  The string
     285             :   // should be nearly flat, otherwise the performance of this method may
     286             :   // be very slow (quadratic in the length).  Setting robustness_flag to
     287             :   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
     288             :   // handles unexpected data without causing assert failures and it does not
     289             :   // do any heap allocations.  This is useful when printing stack traces.
     290             :   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
     291             :                                     RobustnessFlag robustness_flag, int offset,
     292             :                                     int length, int* length_output = nullptr);
     293             :   std::unique_ptr<char[]> ToCString(
     294             :       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
     295             :       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
     296             :       int* length_output = nullptr);
     297             : 
     298             :   bool ComputeArrayIndex(uint32_t* index);
     299             : 
     300             :   // Externalization.
     301             :   bool MakeExternal(v8::String::ExternalStringResource* resource);
     302             :   bool MakeExternal(v8::String::ExternalOneByteStringResource* resource);
     303             :   bool SupportsExternalization();
     304             : 
     305             :   // Conversion.
     306             :   inline bool AsArrayIndex(uint32_t* index);
     307             :   uint32_t inline ToValidIndex(Object number);
     308             : 
     309             :   // Trimming.
     310             :   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
     311             :   static Handle<String> Trim(Isolate* isolate, Handle<String> string,
     312             :                              TrimMode mode);
     313             : 
     314             :   DECL_CAST(String)
     315             : 
     316             :   void PrintOn(FILE* out);
     317             : 
     318             :   // For use during stack traces.  Performs rudimentary sanity check.
     319             :   bool LooksValid();
     320             : 
     321             :   // Dispatched behavior.
     322             :   void StringShortPrint(StringStream* accumulator, bool show_details = true);
     323             :   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
     324             : #if defined(DEBUG) || defined(OBJECT_PRINT)
     325             :   char* ToAsciiArray();
     326             : #endif
     327             :   DECL_PRINTER(String)
     328             :   DECL_VERIFIER(String)
     329             : 
     330             :   inline bool IsFlat();
     331             : 
     332             :   // Layout description.
     333             :   static const int kLengthOffset = Name::kHeaderSize;
     334             :   static const int kHeaderSize = kLengthOffset + kInt32Size;
     335             : 
     336             :   // Max char codes.
     337             :   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
     338             :   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
     339             :   static const int kMaxUtf16CodeUnit = 0xffff;
     340             :   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
     341             :   static const uc32 kMaxCodePoint = 0x10ffff;
     342             : 
     343             :   // Maximal string length.
     344             :   // The max length is different on 32 and 64 bit platforms. Max length for a
     345             :   // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
     346             :   // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
     347             :   // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
     348             :   // each char needs two bytes, subtract 24 bytes for the string header size.
     349             : 
     350             :   // See include/v8.h for the definition.
     351             :   static const int kMaxLength = v8::String::kMaxLength;
     352             :   static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kHeaderSize),
     353             :                 "Unexpected max String length");
     354             : 
     355             :   // Max length for computing hash. For strings longer than this limit the
     356             :   // string length is used as the hash value.
     357             :   static const int kMaxHashCalcLength = 16383;
     358             : 
     359             :   // Limit for truncation in short printing.
     360             :   static const int kMaxShortPrintLength = 1024;
     361             : 
     362             :   // Helper function for flattening strings.
     363             :   template <typename sinkchar>
     364             :   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
     365             : 
     366             :   // The return value may point to the first aligned word containing the first
     367             :   // non-one-byte character, rather than directly to the non-one-byte character.
     368             :   // If the return value is >= the passed length, the entire string was
     369             :   // one-byte.
     370    10164602 :   static inline int NonAsciiStart(const char* chars, int length) {
     371             :     const char* start = chars;
     372    10164602 :     const char* limit = chars + length;
     373             : 
     374    10164602 :     if (length >= kIntptrSize) {
     375             :       // Check unaligned bytes.
     376     5300938 :       while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
     377     3724785 :         if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     378         286 :           return static_cast<int>(chars - start);
     379             :         }
     380     3724499 :         ++chars;
     381             :       }
     382             :       // Check aligned words.
     383             :       DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
     384             :       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
     385   874769382 :       while (chars + sizeof(uintptr_t) <= limit) {
     386   873194801 :         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
     387        1572 :           return static_cast<int>(chars - start);
     388             :         }
     389             :         chars += sizeof(uintptr_t);
     390             :       }
     391             :     }
     392             :     // Check remaining unaligned bytes.
     393    45515989 :     while (chars < limit) {
     394    35357217 :       if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     395        3972 :         return static_cast<int>(chars - start);
     396             :       }
     397    35353245 :       ++chars;
     398             :     }
     399             : 
     400    10158772 :     return static_cast<int>(chars - start);
     401             :   }
     402             : 
     403             :   static inline bool IsAscii(const char* chars, int length) {
     404       10875 :     return NonAsciiStart(chars, length) >= length;
     405             :   }
     406             : 
     407             :   static inline bool IsAscii(const uint8_t* chars, int length) {
     408           0 :     return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
     409             :            length;
     410             :   }
     411             : 
     412             :   static inline int NonOneByteStart(const uc16* chars, int length) {
     413     1879730 :     const uc16* limit = chars + length;
     414             :     const uc16* start = chars;
     415  1499402760 :     while (chars < limit) {
     416  1498139846 :       if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
     417  1497523030 :       ++chars;
     418             :     }
     419     1262914 :     return static_cast<int>(chars - start);
     420             :   }
     421             : 
     422             :   static inline bool IsOneByte(const uc16* chars, int length) {
     423             :     return NonOneByteStart(chars, length) >= length;
     424             :   }
     425             : 
     426             :   template <class Visitor>
     427             :   static inline ConsString VisitFlat(Visitor* visitor, String string,
     428             :                                      int offset = 0);
     429             : 
     430             :   static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
     431             :                                               Handle<String> string,
     432             :                                               bool include_ending_line);
     433             : 
     434             :  private:
     435             :   friend class Name;
     436             :   friend class StringTableInsertionKey;
     437             :   friend class InternalizedStringKey;
     438             : 
     439             :   static Handle<String> SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
     440             :                                     PretenureFlag tenure);
     441             : 
     442             :   // Slow case of String::Equals.  This implementation works on any strings
     443             :   // but it is most efficient on strings that are almost flat.
     444             :   bool SlowEquals(String other);
     445             : 
     446             :   static bool SlowEquals(Isolate* isolate, Handle<String> one,
     447             :                          Handle<String> two);
     448             : 
     449             :   // Slow case of AsArrayIndex.
     450             :   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
     451             : 
     452             :   // Compute and set the hash code.
     453             :   uint32_t ComputeAndSetHash(Isolate* isolate);
     454             : 
     455   109778593 :   OBJECT_CONSTRUCTORS(String, Name);
     456             : };
     457             : 
     458             : class SubStringRange {
     459             :  public:
     460             :   inline SubStringRange(String string, const DisallowHeapAllocation& no_gc,
     461             :                         int first = 0, int length = -1);
     462             :   class iterator;
     463             :   inline iterator begin();
     464             :   inline iterator end();
     465             : 
     466             :  private:
     467             :   String string_;
     468             :   int first_;
     469             :   int length_;
     470             :   const DisallowHeapAllocation& no_gc_;
     471             : };
     472             : 
     473             : // The SeqString abstract class captures sequential string values.
     474             : class SeqString : public String {
     475             :  public:
     476             :   DECL_CAST(SeqString)
     477             : 
     478             :   // Truncate the string in-place if possible and return the result.
     479             :   // In case of new_length == 0, the empty string is returned without
     480             :   // truncating the original string.
     481             :   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
     482             :                                                        int new_length);
     483             : 
     484             :   OBJECT_CONSTRUCTORS(SeqString, String);
     485             : };
     486             : 
     487             : class InternalizedString : public String {
     488             :  public:
     489             :   DECL_CAST(InternalizedString)
     490             :   // TODO(neis): Possibly move some stuff from String here.
     491             : 
     492             :   OBJECT_CONSTRUCTORS(InternalizedString, String);
     493             : };
     494             : 
     495             : // The OneByteString class captures sequential one-byte string objects.
     496             : // Each character in the OneByteString is an one-byte character.
     497             : class SeqOneByteString : public SeqString {
     498             :  public:
     499             :   static const bool kHasOneByteEncoding = true;
     500             : 
     501             :   // Dispatched behavior.
     502             :   inline uint16_t SeqOneByteStringGet(int index);
     503             :   inline void SeqOneByteStringSet(int index, uint16_t value);
     504             : 
     505             :   // Get the address of the characters in this string.
     506             :   inline Address GetCharsAddress();
     507             : 
     508             :   inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
     509             : 
     510             :   // Clear uninitialized padding space. This ensures that the snapshot content
     511             :   // is deterministic.
     512             :   void clear_padding();
     513             : 
     514             :   DECL_CAST(SeqOneByteString)
     515             : 
     516             :   // Garbage collection support.  This method is called by the
     517             :   // garbage collector to compute the actual size of an OneByteString
     518             :   // instance.
     519             :   inline int SeqOneByteStringSize(InstanceType instance_type);
     520             : 
     521             :   // Computes the size for an OneByteString instance of a given length.
     522             :   static int SizeFor(int length) {
     523   422684054 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
     524             :   }
     525             : 
     526             :   // Maximal memory usage for a single sequential one-byte string.
     527             :   static const int kMaxCharsSize = kMaxLength;
     528             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     529             :   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
     530             : 
     531             :   class BodyDescriptor;
     532             : 
     533  9480967857 :   OBJECT_CONSTRUCTORS(SeqOneByteString, SeqString);
     534             : };
     535             : 
     536             : // The TwoByteString class captures sequential unicode string objects.
     537             : // Each character in the TwoByteString is a two-byte uint16_t.
     538             : class SeqTwoByteString : public SeqString {
     539             :  public:
     540             :   static const bool kHasOneByteEncoding = false;
     541             : 
     542             :   // Dispatched behavior.
     543             :   inline uint16_t SeqTwoByteStringGet(int index);
     544             :   inline void SeqTwoByteStringSet(int index, uint16_t value);
     545             : 
     546             :   // Get the address of the characters in this string.
     547             :   inline Address GetCharsAddress();
     548             : 
     549             :   inline uc16* GetChars(const DisallowHeapAllocation& no_gc);
     550             : 
     551             :   // Clear uninitialized padding space. This ensures that the snapshot content
     552             :   // is deterministic.
     553             :   void clear_padding();
     554             : 
     555             :   DECL_CAST(SeqTwoByteString)
     556             : 
     557             :   // Garbage collection support.  This method is called by the
     558             :   // garbage collector to compute the actual size of a TwoByteString
     559             :   // instance.
     560             :   inline int SeqTwoByteStringSize(InstanceType instance_type);
     561             : 
     562             :   // Computes the size for a TwoByteString instance of a given length.
     563             :   static int SizeFor(int length) {
     564   226580474 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
     565             :   }
     566             : 
     567             :   // Maximal memory usage for a single sequential two-byte string.
     568             :   static const int kMaxCharsSize = kMaxLength * 2;
     569             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     570             :   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
     571             :                 String::kMaxLength);
     572             : 
     573             :   class BodyDescriptor;
     574             : 
     575   227062933 :   OBJECT_CONSTRUCTORS(SeqTwoByteString, SeqString);
     576             : };
     577             : 
     578             : // The ConsString class describes string values built by using the
     579             : // addition operator on strings.  A ConsString is a pair where the
     580             : // first and second components are pointers to other string values.
     581             : // One or both components of a ConsString can be pointers to other
     582             : // ConsStrings, creating a binary tree of ConsStrings where the leaves
     583             : // are non-ConsString string values.  The string value represented by
     584             : // a ConsString can be obtained by concatenating the leaf string
     585             : // values in a left-to-right depth-first traversal of the tree.
     586             : class ConsString : public String {
     587             :  public:
     588             :   // First string of the cons cell.
     589             :   inline String first();
     590             :   // Doesn't check that the result is a string, even in debug mode.  This is
     591             :   // useful during GC where the mark bits confuse the checks.
     592             :   inline Object unchecked_first();
     593             :   inline void set_first(Isolate* isolate, String first,
     594             :                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     595             : 
     596             :   // Second string of the cons cell.
     597             :   inline String second();
     598             :   // Doesn't check that the result is a string, even in debug mode.  This is
     599             :   // useful during GC where the mark bits confuse the checks.
     600             :   inline Object unchecked_second();
     601             :   inline void set_second(Isolate* isolate, String second,
     602             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     603             : 
     604             :   // Dispatched behavior.
     605             :   V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
     606             : 
     607             :   DECL_CAST(ConsString)
     608             : 
     609             :   // Layout description.
     610             : #define CONS_STRING_FIELDS(V)   \
     611             :   V(kFirstOffset, kTaggedSize)  \
     612             :   V(kSecondOffset, kTaggedSize) \
     613             :   /* Total size. */             \
     614             :   V(kSize, 0)
     615             : 
     616             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, CONS_STRING_FIELDS)
     617             : #undef CONS_STRING_FIELDS
     618             : 
     619             :   // Minimum length for a cons string.
     620             :   static const int kMinLength = 13;
     621             : 
     622             :   typedef FixedBodyDescriptor<kFirstOffset, kSize, kSize> BodyDescriptor;
     623             : 
     624             :   DECL_VERIFIER(ConsString)
     625             : 
     626      273526 :   OBJECT_CONSTRUCTORS(ConsString, String);
     627             : };
     628             : 
     629             : // The ThinString class describes string objects that are just references
     630             : // to another string object. They are used for in-place internalization when
     631             : // the original string cannot actually be internalized in-place: in these
     632             : // cases, the original string is converted to a ThinString pointing at its
     633             : // internalized version (which is allocated as a new object).
     634             : // In terms of memory layout and most algorithms operating on strings,
     635             : // ThinStrings can be thought of as "one-part cons strings".
     636             : class ThinString : public String {
     637             :  public:
     638             :   // Actual string that this ThinString refers to.
     639             :   inline String actual() const;
     640             :   inline HeapObject unchecked_actual() const;
     641             :   inline void set_actual(String s,
     642             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     643             : 
     644             :   V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
     645             : 
     646             :   DECL_CAST(ThinString)
     647             :   DECL_VERIFIER(ThinString)
     648             : 
     649             :   // Layout description.
     650             : #define THIN_STRING_FIELDS(V)   \
     651             :   V(kActualOffset, kTaggedSize) \
     652             :   /* Total size. */             \
     653             :   V(kSize, 0)
     654             : 
     655             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, THIN_STRING_FIELDS)
     656             : #undef THIN_STRING_FIELDS
     657             : 
     658             :   typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor;
     659             : 
     660        2624 :   OBJECT_CONSTRUCTORS(ThinString, String);
     661             : };
     662             : 
     663             : // The Sliced String class describes strings that are substrings of another
     664             : // sequential string.  The motivation is to save time and memory when creating
     665             : // a substring.  A Sliced String is described as a pointer to the parent,
     666             : // the offset from the start of the parent string and the length.  Using
     667             : // a Sliced String therefore requires unpacking of the parent string and
     668             : // adding the offset to the start address.  A substring of a Sliced String
     669             : // are not nested since the double indirection is simplified when creating
     670             : // such a substring.
     671             : // Currently missing features are:
     672             : //  - handling externalized parent strings
     673             : //  - external strings as parent
     674             : //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
     675             : class SlicedString : public String {
     676             :  public:
     677             :   inline String parent();
     678             :   inline void set_parent(Isolate* isolate, String parent,
     679             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     680             :   inline int offset() const;
     681             :   inline void set_offset(int offset);
     682             : 
     683             :   // Dispatched behavior.
     684             :   V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
     685             : 
     686             :   DECL_CAST(SlicedString)
     687             : 
     688             :   // Layout description.
     689             : #define SLICED_STRING_FIELDS(V) \
     690             :   V(kParentOffset, kTaggedSize) \
     691             :   V(kOffsetOffset, kTaggedSize) \
     692             :   /* Total size. */             \
     693             :   V(kSize, 0)
     694             : 
     695             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, SLICED_STRING_FIELDS)
     696             : #undef SLICED_STRING_FIELDS
     697             : 
     698             :   // Minimum length for a sliced string.
     699             :   static const int kMinLength = 13;
     700             : 
     701             :   typedef FixedBodyDescriptor<kParentOffset, kSize, kSize> BodyDescriptor;
     702             : 
     703             :   DECL_VERIFIER(SlicedString)
     704             : 
     705     1095402 :   OBJECT_CONSTRUCTORS(SlicedString, String);
     706             : };
     707             : 
     708             : // The ExternalString class describes string values that are backed by
     709             : // a string resource that lies outside the V8 heap.  ExternalStrings
     710             : // consist of the length field common to all strings, a pointer to the
     711             : // external resource.  It is important to ensure (externally) that the
     712             : // resource is not deallocated while the ExternalString is live in the
     713             : // V8 heap.
     714             : //
     715             : // The API expects that all ExternalStrings are created through the
     716             : // API.  Therefore, ExternalStrings should not be used internally.
     717             : class ExternalString : public String {
     718             :  public:
     719             :   DECL_CAST(ExternalString)
     720             : 
     721             :   // Layout description.
     722             : #define EXTERNAL_STRING_FIELDS(V)            \
     723             :   V(kResourceOffset, kSystemPointerSize)     \
     724             :   /* Size of uncached external strings. */   \
     725             :   V(kUncachedSize, 0)                        \
     726             :   V(kResourceDataOffset, kSystemPointerSize) \
     727             :   /* Total size. */                          \
     728             :   V(kSize, 0)
     729             : 
     730             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, EXTERNAL_STRING_FIELDS)
     731             : #undef EXTERNAL_STRING_FIELDS
     732             : 
     733             :   // Return whether the external string data pointer is not cached.
     734             :   inline bool is_uncached() const;
     735             :   // Size in bytes of the external payload.
     736             :   int ExternalPayloadSize() const;
     737             : 
     738             :   // Used in the serializer/deserializer.
     739             :   inline Address resource_as_address();
     740             :   inline void set_address_as_resource(Address address);
     741             :   inline uint32_t resource_as_uint32();
     742             :   inline void set_uint32_as_resource(uint32_t value);
     743             : 
     744             :   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
     745             : 
     746             :   OBJECT_CONSTRUCTORS(ExternalString, String);
     747             : };
     748             : 
     749             : // The ExternalOneByteString class is an external string backed by an
     750             : // one-byte string.
     751             : class ExternalOneByteString : public ExternalString {
     752             :  public:
     753             :   static const bool kHasOneByteEncoding = true;
     754             : 
     755             :   typedef v8::String::ExternalOneByteStringResource Resource;
     756             : 
     757             :   // The underlying resource.
     758             :   inline const Resource* resource();
     759             : 
     760             :   // It is assumed that the previous resource is null. If it is not null, then
     761             :   // it is the responsability of the caller the handle the previous resource.
     762             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     763             :   // Used only during serialization.
     764             :   inline void set_resource(const Resource* buffer);
     765             : 
     766             :   // Update the pointer cache to the external character array.
     767             :   // The cached pointer is always valid, as the external character array does =
     768             :   // not move during lifetime.  Deserialization is the only exception, after
     769             :   // which the pointer cache has to be refreshed.
     770             :   inline void update_data_cache();
     771             : 
     772             :   inline const uint8_t* GetChars();
     773             : 
     774             :   // Dispatched behavior.
     775             :   inline uint16_t ExternalOneByteStringGet(int index);
     776             : 
     777             :   DECL_CAST(ExternalOneByteString)
     778             : 
     779             :   class BodyDescriptor;
     780             : 
     781     8882173 :   OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
     782             : };
     783             : 
     784             : // The ExternalTwoByteString class is an external string backed by a UTF-16
     785             : // encoded string.
     786             : class ExternalTwoByteString : public ExternalString {
     787             :  public:
     788             :   static const bool kHasOneByteEncoding = false;
     789             : 
     790             :   typedef v8::String::ExternalStringResource Resource;
     791             : 
     792             :   // The underlying string resource.
     793             :   inline const Resource* resource();
     794             : 
     795             :   // It is assumed that the previous resource is null. If it is not null, then
     796             :   // it is the responsability of the caller the handle the previous resource.
     797             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     798             :   // Used only during serialization.
     799             :   inline void set_resource(const Resource* buffer);
     800             : 
     801             :   // Update the pointer cache to the external character array.
     802             :   // The cached pointer is always valid, as the external character array does =
     803             :   // not move during lifetime.  Deserialization is the only exception, after
     804             :   // which the pointer cache has to be refreshed.
     805             :   inline void update_data_cache();
     806             : 
     807             :   inline const uint16_t* GetChars();
     808             : 
     809             :   // Dispatched behavior.
     810             :   inline uint16_t ExternalTwoByteStringGet(int index);
     811             : 
     812             :   // For regexp code.
     813             :   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
     814             : 
     815             :   DECL_CAST(ExternalTwoByteString)
     816             : 
     817             :   class BodyDescriptor;
     818             : 
     819       54823 :   OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
     820             : };
     821             : 
     822             : // A flat string reader provides random access to the contents of a
     823             : // string independent of the character width of the string.  The handle
     824             : // must be valid as long as the reader is being used.
     825     2808790 : class FlatStringReader : public Relocatable {
     826             :  public:
     827             :   FlatStringReader(Isolate* isolate, Handle<String> str);
     828             :   FlatStringReader(Isolate* isolate, Vector<const char> input);
     829             :   void PostGarbageCollection() override;
     830             :   inline uc32 Get(int index);
     831             :   template <typename Char>
     832             :   inline Char Get(int index);
     833  2443120408 :   int length() { return length_; }
     834             : 
     835             :  private:
     836             :   Address* str_;
     837             :   bool is_one_byte_;
     838             :   int length_;
     839             :   const void* start_;
     840             : };
     841             : 
     842             : // This maintains an off-stack representation of the stack frames required
     843             : // to traverse a ConsString, allowing an entirely iterative and restartable
     844             : // traversal of the entire string
     845             : class ConsStringIterator {
     846             :  public:
     847   209707767 :   inline ConsStringIterator() = default;
     848     2610072 :   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
     849             :     Reset(cons_string, offset);
     850        1720 :   }
     851             :   inline void Reset(ConsString cons_string, int offset = 0) {
     852    11644471 :     depth_ = 0;
     853             :     // Next will always return nullptr.
     854    11608837 :     if (cons_string.is_null()) return;
     855       42896 :     Initialize(cons_string, offset);
     856             :   }
     857             :   // Returns nullptr when complete.
     858             :   inline String Next(int* offset_out) {
     859    21018702 :     *offset_out = 0;
     860    21018702 :     if (depth_ == 0) return String();
     861    10637895 :     return Continue(offset_out);
     862             :   }
     863             : 
     864             :  private:
     865             :   static const int kStackSize = 32;
     866             :   // Use a mask instead of doing modulo operations for stack wrapping.
     867             :   static const int kDepthMask = kStackSize - 1;
     868             :   static_assert(base::bits::IsPowerOfTwo(kStackSize),
     869             :                 "kStackSize must be power of two");
     870             :   static inline int OffsetForDepth(int depth);
     871             : 
     872             :   inline void PushLeft(ConsString string);
     873             :   inline void PushRight(ConsString string);
     874             :   inline void AdjustMaximumDepth();
     875             :   inline void Pop();
     876    21495955 :   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
     877             :   void Initialize(ConsString cons_string, int offset);
     878             :   String Continue(int* offset_out);
     879             :   String NextLeaf(bool* blew_stack);
     880             :   String Search(int* offset_out);
     881             : 
     882             :   // Stack must always contain only frames for which right traversal
     883             :   // has not yet been performed.
     884             :   ConsString frames_[kStackSize];
     885             :   ConsString root_;
     886             :   int depth_;
     887             :   int maximum_depth_;
     888             :   int consumed_;
     889             :   DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
     890             : };
     891             : 
     892             : class StringCharacterStream {
     893             :  public:
     894             :   inline explicit StringCharacterStream(String string, int offset = 0);
     895             :   inline uint16_t GetNext();
     896             :   inline bool HasMore();
     897             :   inline void Reset(String string, int offset = 0);
     898             :   inline void VisitOneByteString(const uint8_t* chars, int length);
     899             :   inline void VisitTwoByteString(const uint16_t* chars, int length);
     900             : 
     901             :  private:
     902             :   ConsStringIterator iter_;
     903             :   bool is_one_byte_;
     904             :   union {
     905             :     const uint8_t* buffer8_;
     906             :     const uint16_t* buffer16_;
     907             :   };
     908             :   const uint8_t* end_;
     909             :   DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
     910             : };
     911             : 
     912             : }  // namespace internal
     913             : }  // namespace v8
     914             : 
     915             : #include "src/objects/object-macros-undef.h"
     916             : 
     917             : #endif  // V8_OBJECTS_STRING_H_

Generated by: LCOV version 1.10