LCOV - code coverage report
Current view: top level - src/objects - string.h (source / functions) Hit Total Coverage
Test: app.info Lines: 42 43 97.7 %
Date: 2017-10-20 Functions: 12 17 70.6 %

          Line data    Source code
       1             : // Copyright 2017 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_OBJECTS_STRING_H_
       6             : #define V8_OBJECTS_STRING_H_
       7             : 
       8             : #include "src/base/bits.h"
       9             : #include "src/objects/name.h"
      10             : 
      11             : // Has to be the last include (doesn't have include guards):
      12             : #include "src/objects/object-macros.h"
      13             : 
      14             : namespace v8 {
      15             : namespace internal {
      16             : 
      17             : class BigInt;
      18             : 
      19             : enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
      20             : enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
      21             : 
      22             : // The characteristics of a string are stored in its map.  Retrieving these
      23             : // few bits of information is moderately expensive, involving two memory
      24             : // loads where the second is dependent on the first.  To improve efficiency
      25             : // the shape of the string is given its own class so that it can be retrieved
      26             : // once and used for several string operations.  A StringShape is small enough
      27             : // to be passed by value and is immutable, but be aware that flattening a
      28             : // string can potentially alter its shape.  Also be aware that a GC caused by
      29             : // something else can alter the shape of a string due to ConsString
      30             : // shortcutting.  Keeping these restrictions in mind has proven to be error-
      31             : // prone and so we no longer put StringShapes in variables unless there is a
      32             : // concrete performance benefit at that particular point in the code.
      33             : class StringShape BASE_EMBEDDED {
      34             :  public:
      35             :   inline explicit StringShape(const String* s);
      36             :   inline explicit StringShape(Map* s);
      37             :   inline explicit StringShape(InstanceType t);
      38             :   inline bool IsSequential();
      39             :   inline bool IsExternal();
      40             :   inline bool IsCons();
      41             :   inline bool IsSliced();
      42             :   inline bool IsThin();
      43             :   inline bool IsIndirect();
      44             :   inline bool IsExternalOneByte();
      45             :   inline bool IsExternalTwoByte();
      46             :   inline bool IsSequentialOneByte();
      47             :   inline bool IsSequentialTwoByte();
      48             :   inline bool IsInternalized();
      49             :   inline StringRepresentationTag representation_tag();
      50             :   inline uint32_t encoding_tag();
      51             :   inline uint32_t full_representation_tag();
      52             :   inline bool HasOnlyOneByteChars();
      53             : #ifdef DEBUG
      54             :   inline uint32_t type() { return type_; }
      55             :   inline void invalidate() { valid_ = false; }
      56             :   inline bool valid() { return valid_; }
      57             : #else
      58             :   inline void invalidate() {}
      59             : #endif
      60             : 
      61             :  private:
      62             :   uint32_t type_;
      63             : #ifdef DEBUG
      64             :   inline void set_valid() { valid_ = true; }
      65             :   bool valid_;
      66             : #else
      67             :   inline void set_valid() {}
      68             : #endif
      69             : };
      70             : 
      71             : // The String abstract class captures JavaScript string values:
      72             : //
      73             : // Ecma-262:
      74             : //  4.3.16 String Value
      75             : //    A string value is a member of the type String and is a finite
      76             : //    ordered sequence of zero or more 16-bit unsigned integer values.
      77             : //
      78             : // All string values have a length field.
      79             : class String : public Name {
      80             :  public:
      81             :   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
      82             : 
      83             :   class SubStringRange {
      84             :    public:
      85             :     explicit inline SubStringRange(String* string, int first = 0,
      86             :                                    int length = -1);
      87             :     class iterator;
      88             :     inline iterator begin();
      89             :     inline iterator end();
      90             : 
      91             :    private:
      92             :     String* string_;
      93             :     int first_;
      94             :     int length_;
      95             :   };
      96             : 
      97             :   // Representation of the flat content of a String.
      98             :   // A non-flat string doesn't have flat content.
      99             :   // A flat string has content that's encoded as a sequence of either
     100             :   // one-byte chars or two-byte UC16.
     101             :   // Returned by String::GetFlatContent().
     102             :   class FlatContent {
     103             :    public:
     104             :     // Returns true if the string is flat and this structure contains content.
     105             :     bool IsFlat() const { return state_ != NON_FLAT; }
     106             :     // Returns true if the structure contains one-byte content.
     107     1528327 :     bool IsOneByte() const { return state_ == ONE_BYTE; }
     108             :     // Returns true if the structure contains two-byte content.
     109             :     bool IsTwoByte() const { return state_ == TWO_BYTE; }
     110             : 
     111             :     // Return the one byte content of the string. Only use if IsOneByte()
     112             :     // returns true.
     113     1384561 :     Vector<const uint8_t> ToOneByteVector() const {
     114             :       DCHECK_EQ(ONE_BYTE, state_);
     115     5274823 :       return Vector<const uint8_t>(onebyte_start, length_);
     116             :     }
     117             :     // Return the two-byte content of the string. Only use if IsTwoByte()
     118             :     // returns true.
     119      903271 :     Vector<const uc16> ToUC16Vector() const {
     120             :       DCHECK_EQ(TWO_BYTE, state_);
     121     3808536 :       return Vector<const uc16>(twobyte_start, length_);
     122             :     }
     123             : 
     124        1260 :     uc16 Get(int i) const {
     125             :       DCHECK(i < length_);
     126             :       DCHECK(state_ != NON_FLAT);
     127    83481841 :       if (state_ == ONE_BYTE) return onebyte_start[i];
     128    26025135 :       return twobyte_start[i];
     129             :     }
     130             : 
     131             :     bool UsesSameString(const FlatContent& other) const {
     132             :       return onebyte_start == other.onebyte_start;
     133             :     }
     134             : 
     135             :    private:
     136             :     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
     137             : 
     138             :     // Constructors only used by String::GetFlatContent().
     139             :     explicit FlatContent(const uint8_t* start, int length)
     140             :         : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
     141             :     explicit FlatContent(const uc16* start, int length)
     142             :         : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
     143             :     FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
     144             : 
     145             :     union {
     146             :       const uint8_t* onebyte_start;
     147             :       const uc16* twobyte_start;
     148             :     };
     149             :     int length_;
     150             :     State state_;
     151             : 
     152             :     friend class String;
     153             :     friend class IterableSubString;
     154             :   };
     155             : 
     156             :   template <typename Char>
     157             :   INLINE(Vector<const Char> GetCharVector());
     158             : 
     159             :   // Get and set the length of the string.
     160             :   inline int length() const;
     161             :   inline void set_length(int value);
     162             : 
     163             :   // Get and set the length of the string using acquire loads and release
     164             :   // stores.
     165             :   inline int synchronized_length() const;
     166             :   inline void synchronized_set_length(int value);
     167             : 
     168             :   // Returns whether this string has only one-byte chars, i.e. all of them can
     169             :   // be one-byte encoded.  This might be the case even if the string is
     170             :   // two-byte.  Such strings may appear when the embedder prefers
     171             :   // two-byte external representations even for one-byte data.
     172             :   inline bool IsOneByteRepresentation() const;
     173             :   inline bool IsTwoByteRepresentation() const;
     174             : 
     175             :   // Cons and slices have an encoding flag that may not represent the actual
     176             :   // encoding of the underlying string.  This is taken into account here.
     177             :   // Requires: this->IsFlat()
     178             :   inline bool IsOneByteRepresentationUnderneath();
     179             :   inline bool IsTwoByteRepresentationUnderneath();
     180             : 
     181             :   // NOTE: this should be considered only a hint.  False negatives are
     182             :   // possible.
     183             :   inline bool HasOnlyOneByteChars();
     184             : 
     185             :   // Get and set individual two byte chars in the string.
     186             :   inline void Set(int index, uint16_t value);
     187             :   // Get individual two byte char in the string.  Repeated calls
     188             :   // to this method are not efficient unless the string is flat.
     189             :   INLINE(uint16_t Get(int index));
     190             : 
     191             :   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
     192             :   static Handle<Object> ToNumber(Handle<String> subject);
     193             : 
     194             :   // Flattens the string.  Checks first inline to see if it is
     195             :   // necessary.  Does nothing if the string is not a cons string.
     196             :   // Flattening allocates a sequential string with the same data as
     197             :   // the given string and mutates the cons string to a degenerate
     198             :   // form, where the first component is the new sequential string and
     199             :   // the second component is the empty string.  If allocation fails,
     200             :   // this function returns a failure.  If flattening succeeds, this
     201             :   // function returns the sequential string that is now the first
     202             :   // component of the cons string.
     203             :   //
     204             :   // Degenerate cons strings are handled specially by the garbage
     205             :   // collector (see IsShortcutCandidate).
     206             : 
     207             :   static inline Handle<String> Flatten(Handle<String> string,
     208             :                                        PretenureFlag pretenure = NOT_TENURED);
     209             : 
     210             :   // Tries to return the content of a flat string as a structure holding either
     211             :   // a flat vector of char or of uc16.
     212             :   // If the string isn't flat, and therefore doesn't have flat content, the
     213             :   // returned structure will report so, and can't provide a vector of either
     214             :   // kind.
     215             :   FlatContent GetFlatContent();
     216             : 
     217             :   // Returns the parent of a sliced string or first part of a flat cons string.
     218             :   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
     219             :   inline String* GetUnderlying();
     220             : 
     221             :   // String relational comparison, implemented according to ES6 section 7.2.11
     222             :   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
     223             :   // simple lexicographic ordering on sequences of code unit values. There is no
     224             :   // attempt to use the more complex, semantically oriented definitions of
     225             :   // character or string equality and collating order defined in the Unicode
     226             :   // specification. Therefore String values that are canonically equal according
     227             :   // to the Unicode standard could test as unequal. In effect this algorithm
     228             :   // assumes that both Strings are already in normalized form. Also, note that
     229             :   // for strings containing supplementary characters, lexicographic ordering on
     230             :   // sequences of UTF-16 code unit values differs from that on sequences of code
     231             :   // point values.
     232             :   MUST_USE_RESULT static ComparisonResult Compare(Handle<String> x,
     233             :                                                   Handle<String> y);
     234             : 
     235             :   // Perform ES6 21.1.3.8, including checking arguments.
     236             :   static Object* IndexOf(Isolate* isolate, Handle<Object> receiver,
     237             :                          Handle<Object> search, Handle<Object> position);
     238             :   // Perform string match of pattern on subject, starting at start index.
     239             :   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
     240             :   // check any arguments.
     241             :   static int IndexOf(Isolate* isolate, Handle<String> receiver,
     242             :                      Handle<String> search, int start_index);
     243             : 
     244             :   static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver,
     245             :                              Handle<Object> search, Handle<Object> position);
     246             : 
     247             :   // Encapsulates logic related to a match and its capture groups as required
     248             :   // by GetSubstitution.
     249        3976 :   class Match {
     250             :    public:
     251             :     virtual Handle<String> GetMatch() = 0;
     252             :     virtual Handle<String> GetPrefix() = 0;
     253             :     virtual Handle<String> GetSuffix() = 0;
     254             : 
     255             :     // A named capture can be invalid (if it is not specified in the pattern),
     256             :     // unmatched (specified but not matched in the current string), and matched.
     257             :     enum CaptureState { INVALID, UNMATCHED, MATCHED };
     258             : 
     259             :     virtual int CaptureCount() = 0;
     260             :     virtual bool HasNamedCaptures() = 0;
     261             :     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
     262             :     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
     263             :                                                 CaptureState* state) = 0;
     264             : 
     265        3976 :     virtual ~Match() {}
     266             :   };
     267             : 
     268             :   // ES#sec-getsubstitution
     269             :   // GetSubstitution(matched, str, position, captures, replacement)
     270             :   // Expand the $-expressions in the string and return a new string with
     271             :   // the result.
     272             :   // A {start_index} can be passed to specify where to start scanning the
     273             :   // replacement string.
     274             :   MUST_USE_RESULT static MaybeHandle<String> GetSubstitution(
     275             :       Isolate* isolate, Match* match, Handle<String> replacement,
     276             :       int start_index = 0);
     277             : 
     278             :   // String equality operations.
     279             :   inline bool Equals(String* other);
     280             :   inline static bool Equals(Handle<String> one, Handle<String> two);
     281             :   bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
     282             : 
     283             :   // Dispatches to Is{One,Two}ByteEqualTo.
     284             :   template <typename Char>
     285             :   bool IsEqualTo(Vector<const Char> str);
     286             : 
     287             :   bool IsOneByteEqualTo(Vector<const uint8_t> str);
     288             :   bool IsTwoByteEqualTo(Vector<const uc16> str);
     289             : 
     290             :   // Return a UTF8 representation of the string.  The string is null
     291             :   // terminated but may optionally contain nulls.  Length is returned
     292             :   // in length_output if length_output is not a null pointer  The string
     293             :   // should be nearly flat, otherwise the performance of this method may
     294             :   // be very slow (quadratic in the length).  Setting robustness_flag to
     295             :   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
     296             :   // handles unexpected data without causing assert failures and it does not
     297             :   // do any heap allocations.  This is useful when printing stack traces.
     298             :   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
     299             :                                     RobustnessFlag robustness_flag, int offset,
     300             :                                     int length, int* length_output = 0);
     301             :   std::unique_ptr<char[]> ToCString(
     302             :       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
     303             :       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
     304             :       int* length_output = 0);
     305             : 
     306             :   bool ComputeArrayIndex(uint32_t* index);
     307             : 
     308             :   // Externalization.
     309             :   bool MakeExternal(v8::String::ExternalStringResource* resource);
     310             :   bool MakeExternal(v8::String::ExternalOneByteStringResource* resource);
     311             : 
     312             :   // Conversion.
     313             :   inline bool AsArrayIndex(uint32_t* index);
     314             :   uint32_t inline ToValidIndex(Object* number);
     315             : 
     316             :   // Trimming.
     317             :   enum TrimMode { kTrim, kTrimLeft, kTrimRight };
     318             :   static Handle<String> Trim(Handle<String> string, TrimMode mode);
     319             : 
     320             :   DECL_CAST(String)
     321             : 
     322             :   void PrintOn(FILE* out);
     323             : 
     324             :   // For use during stack traces.  Performs rudimentary sanity check.
     325             :   bool LooksValid();
     326             : 
     327             :   // Dispatched behavior.
     328             :   void StringShortPrint(StringStream* accumulator, bool show_details = true);
     329             :   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
     330             : #if defined(DEBUG) || defined(OBJECT_PRINT)
     331             :   char* ToAsciiArray();
     332             : #endif
     333             :   DECL_PRINTER(String)
     334             :   DECL_VERIFIER(String)
     335             : 
     336             :   inline bool IsFlat();
     337             : 
     338             :   // Layout description.
     339             :   static const int kLengthOffset = Name::kSize;
     340             :   static const int kSize = kLengthOffset + kPointerSize;
     341             : 
     342             :   // Max char codes.
     343             :   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
     344             :   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
     345             :   static const int kMaxUtf16CodeUnit = 0xffff;
     346             :   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
     347             :   static const uc32 kMaxCodePoint = 0x10ffff;
     348             : 
     349             :   // Maximal string length.
     350             :   // The max length is different on 32 and 64 bit platforms. Max length for a
     351             :   // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
     352             :   // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
     353             :   // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
     354             :   // each char needs two bytes, subtract 24 bytes for the string header size.
     355             : 
     356             :   // See include/v8.h for the definition.
     357             :   static const int kMaxLength = v8::String::kMaxLength;
     358             : 
     359             :   // Max length for computing hash. For strings longer than this limit the
     360             :   // string length is used as the hash value.
     361             :   static const int kMaxHashCalcLength = 16383;
     362             : 
     363             :   // Limit for truncation in short printing.
     364             :   static const int kMaxShortPrintLength = 1024;
     365             : 
     366             :   // Support for regular expressions.
     367             :   const uc16* GetTwoByteData(unsigned start);
     368             : 
     369             :   // Helper function for flattening strings.
     370             :   template <typename sinkchar>
     371             :   static void WriteToFlat(String* source, sinkchar* sink, int from, int to);
     372             : 
     373             :   // The return value may point to the first aligned word containing the first
     374             :   // non-one-byte character, rather than directly to the non-one-byte character.
     375             :   // If the return value is >= the passed length, the entire string was
     376             :   // one-byte.
     377     9721953 :   static inline int NonAsciiStart(const char* chars, int length) {
     378             :     const char* start = chars;
     379     9721953 :     const char* limit = chars + length;
     380             : 
     381     9721953 :     if (length >= kIntptrSize) {
     382             :       // Check unaligned bytes.
     383     4800247 :       while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
     384     3454780 :         if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     385         310 :           return static_cast<int>(chars - start);
     386             :         }
     387     3454470 :         ++chars;
     388             :       }
     389             :       // Check aligned words.
     390             :       DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
     391             :       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
     392   855609207 :       while (chars + sizeof(uintptr_t) <= limit) {
     393   854265039 :         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
     394        1299 :           return static_cast<int>(chars - start);
     395             :         }
     396             :         chars += sizeof(uintptr_t);
     397             :       }
     398             :     }
     399             :     // Check remaining unaligned bytes.
     400    41519960 :     while (chars < limit) {
     401    31803956 :       if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     402        4340 :         return static_cast<int>(chars - start);
     403             :       }
     404    31799616 :       ++chars;
     405             :     }
     406             : 
     407     9716004 :     return static_cast<int>(chars - start);
     408             :   }
     409             : 
     410             :   static inline bool IsAscii(const char* chars, int length) {
     411           5 :     return NonAsciiStart(chars, length) >= length;
     412             :   }
     413             : 
     414             :   static inline bool IsAscii(const uint8_t* chars, int length) {
     415           0 :     return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
     416             :            length;
     417             :   }
     418             : 
     419             :   static inline int NonOneByteStart(const uc16* chars, int length) {
     420     2055416 :     const uc16* limit = chars + length;
     421             :     const uc16* start = chars;
     422  1426985818 :     while (chars < limit) {
     423  1425599141 :       if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
     424  1424930402 :       ++chars;
     425             :     }
     426     1386677 :     return static_cast<int>(chars - start);
     427             :   }
     428             : 
     429             :   static inline bool IsOneByte(const uc16* chars, int length) {
     430             :     return NonOneByteStart(chars, length) >= length;
     431             :   }
     432             : 
     433             :   template <class Visitor>
     434             :   static inline ConsString* VisitFlat(Visitor* visitor, String* string,
     435             :                                       int offset = 0);
     436             : 
     437             :   static Handle<FixedArray> CalculateLineEnds(Handle<String> string,
     438             :                                               bool include_ending_line);
     439             : 
     440             :   // Use the hash field to forward to the canonical internalized string
     441             :   // when deserializing an internalized string.
     442             :   inline void SetForwardedInternalizedString(String* string);
     443             :   inline String* GetForwardedInternalizedString();
     444             : 
     445             :  private:
     446             :   friend class Name;
     447             :   friend class StringTableInsertionKey;
     448             :   friend class InternalizedStringKey;
     449             : 
     450             :   static Handle<String> SlowFlatten(Handle<ConsString> cons,
     451             :                                     PretenureFlag tenure);
     452             : 
     453             :   // Slow case of String::Equals.  This implementation works on any strings
     454             :   // but it is most efficient on strings that are almost flat.
     455             :   bool SlowEquals(String* other);
     456             : 
     457             :   static bool SlowEquals(Handle<String> one, Handle<String> two);
     458             : 
     459             :   // Slow case of AsArrayIndex.
     460             :   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
     461             : 
     462             :   // Compute and set the hash code.
     463             :   uint32_t ComputeAndSetHash();
     464             : 
     465             :   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
     466             : };
     467             : 
     468             : // The SeqString abstract class captures sequential string values.
     469             : class SeqString : public String {
     470             :  public:
     471             :   DECL_CAST(SeqString)
     472             : 
     473             :   // Layout description.
     474             :   static const int kHeaderSize = String::kSize;
     475             : 
     476             :   // Truncate the string in-place if possible and return the result.
     477             :   // In case of new_length == 0, the empty string is returned without
     478             :   // truncating the original string.
     479             :   MUST_USE_RESULT static Handle<String> Truncate(Handle<SeqString> string,
     480             :                                                  int new_length);
     481             : 
     482             :  private:
     483             :   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString);
     484             : };
     485             : 
     486             : // The OneByteString class captures sequential one-byte string objects.
     487             : // Each character in the OneByteString is an one-byte character.
     488             : class SeqOneByteString : public SeqString {
     489             :  public:
     490             :   static const bool kHasOneByteEncoding = true;
     491             : 
     492             :   // Dispatched behavior.
     493             :   inline uint16_t SeqOneByteStringGet(int index);
     494             :   inline void SeqOneByteStringSet(int index, uint16_t value);
     495             : 
     496             :   // Get the address of the characters in this string.
     497             :   inline Address GetCharsAddress();
     498             : 
     499             :   inline uint8_t* GetChars();
     500             : 
     501             :   // Clear uninitialized padding space. This ensures that the snapshot content
     502             :   // is deterministic.
     503             :   void clear_padding();
     504             : 
     505             :   DECL_CAST(SeqOneByteString)
     506             : 
     507             :   // Garbage collection support.  This method is called by the
     508             :   // garbage collector to compute the actual size of an OneByteString
     509             :   // instance.
     510             :   inline int SeqOneByteStringSize(InstanceType instance_type);
     511             : 
     512             :   // Computes the size for an OneByteString instance of a given length.
     513             :   static int SizeFor(int length) {
     514   561910868 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
     515             :   }
     516             : 
     517             :   // Maximal memory usage for a single sequential one-byte string.
     518             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxLength + kHeaderSize);
     519             :   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
     520             : 
     521             :   class BodyDescriptor;
     522             :   // No weak fields.
     523             :   typedef BodyDescriptor BodyDescriptorWeak;
     524             : 
     525             :  private:
     526             :   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqOneByteString);
     527             : };
     528             : 
     529             : // The TwoByteString class captures sequential unicode string objects.
     530             : // Each character in the TwoByteString is a two-byte uint16_t.
     531             : class SeqTwoByteString : public SeqString {
     532             :  public:
     533             :   static const bool kHasOneByteEncoding = false;
     534             : 
     535             :   // Dispatched behavior.
     536             :   inline uint16_t SeqTwoByteStringGet(int index);
     537             :   inline void SeqTwoByteStringSet(int index, uint16_t value);
     538             : 
     539             :   // Get the address of the characters in this string.
     540             :   inline Address GetCharsAddress();
     541             : 
     542             :   inline uc16* GetChars();
     543             : 
     544             :   // Clear uninitialized padding space. This ensures that the snapshot content
     545             :   // is deterministic.
     546             :   void clear_padding();
     547             : 
     548             :   // For regexp code.
     549             :   const uint16_t* SeqTwoByteStringGetData(unsigned start);
     550             : 
     551             :   DECL_CAST(SeqTwoByteString)
     552             : 
     553             :   // Garbage collection support.  This method is called by the
     554             :   // garbage collector to compute the actual size of a TwoByteString
     555             :   // instance.
     556             :   inline int SeqTwoByteStringSize(InstanceType instance_type);
     557             : 
     558             :   // Computes the size for a TwoByteString instance of a given length.
     559             :   static int SizeFor(int length) {
     560    80759506 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
     561             :   }
     562             : 
     563             :   // Maximal memory usage for a single sequential two-byte string.
     564             :   static const int kMaxSize =
     565             :       OBJECT_POINTER_ALIGN(kMaxLength * 2 + kHeaderSize);
     566             :   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
     567             :                 String::kMaxLength);
     568             : 
     569             :   class BodyDescriptor;
     570             :   // No weak fields.
     571             :   typedef BodyDescriptor BodyDescriptorWeak;
     572             : 
     573             :  private:
     574             :   DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString);
     575             : };
     576             : 
     577             : // The ConsString class describes string values built by using the
     578             : // addition operator on strings.  A ConsString is a pair where the
     579             : // first and second components are pointers to other string values.
     580             : // One or both components of a ConsString can be pointers to other
     581             : // ConsStrings, creating a binary tree of ConsStrings where the leaves
     582             : // are non-ConsString string values.  The string value represented by
     583             : // a ConsString can be obtained by concatenating the leaf string
     584             : // values in a left-to-right depth-first traversal of the tree.
     585             : class ConsString : public String {
     586             :  public:
     587             :   // First string of the cons cell.
     588             :   inline String* first();
     589             :   // Doesn't check that the result is a string, even in debug mode.  This is
     590             :   // useful during GC where the mark bits confuse the checks.
     591             :   inline Object* unchecked_first();
     592             :   inline void set_first(String* first,
     593             :                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     594             : 
     595             :   // Second string of the cons cell.
     596             :   inline String* second();
     597             :   // Doesn't check that the result is a string, even in debug mode.  This is
     598             :   // useful during GC where the mark bits confuse the checks.
     599             :   inline Object* unchecked_second();
     600             :   inline void set_second(String* second,
     601             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     602             : 
     603             :   // Dispatched behavior.
     604             :   V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
     605             : 
     606             :   DECL_CAST(ConsString)
     607             : 
     608             :   // Layout description.
     609             :   static const int kFirstOffset = POINTER_SIZE_ALIGN(String::kSize);
     610             :   static const int kSecondOffset = kFirstOffset + kPointerSize;
     611             :   static const int kSize = kSecondOffset + kPointerSize;
     612             : 
     613             :   // Minimum length for a cons string.
     614             :   static const int kMinLength = 13;
     615             : 
     616             :   typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
     617             :       BodyDescriptor;
     618             :   // No weak fields.
     619             :   typedef BodyDescriptor BodyDescriptorWeak;
     620             : 
     621             :   DECL_VERIFIER(ConsString)
     622             : 
     623             :  private:
     624             :   DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
     625             : };
     626             : 
     627             : // The ThinString class describes string objects that are just references
     628             : // to another string object. They are used for in-place internalization when
     629             : // the original string cannot actually be internalized in-place: in these
     630             : // cases, the original string is converted to a ThinString pointing at its
     631             : // internalized version (which is allocated as a new object).
     632             : // In terms of memory layout and most algorithms operating on strings,
     633             : // ThinStrings can be thought of as "one-part cons strings".
     634             : class ThinString : public String {
     635             :  public:
     636             :   // Actual string that this ThinString refers to.
     637             :   inline String* actual() const;
     638             :   inline void set_actual(String* s,
     639             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     640             : 
     641             :   V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
     642             : 
     643             :   DECL_CAST(ThinString)
     644             :   DECL_VERIFIER(ThinString)
     645             : 
     646             :   // Layout description.
     647             :   static const int kActualOffset = String::kSize;
     648             :   static const int kSize = kActualOffset + kPointerSize;
     649             : 
     650             :   typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor;
     651             :   // No weak fields.
     652             :   typedef BodyDescriptor BodyDescriptorWeak;
     653             : 
     654             :  private:
     655             :   DISALLOW_COPY_AND_ASSIGN(ThinString);
     656             : };
     657             : 
     658             : // The Sliced String class describes strings that are substrings of another
     659             : // sequential string.  The motivation is to save time and memory when creating
     660             : // a substring.  A Sliced String is described as a pointer to the parent,
     661             : // the offset from the start of the parent string and the length.  Using
     662             : // a Sliced String therefore requires unpacking of the parent string and
     663             : // adding the offset to the start address.  A substring of a Sliced String
     664             : // are not nested since the double indirection is simplified when creating
     665             : // such a substring.
     666             : // Currently missing features are:
     667             : //  - handling externalized parent strings
     668             : //  - external strings as parent
     669             : //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
     670             : class SlicedString : public String {
     671             :  public:
     672             :   inline String* parent();
     673             :   inline void set_parent(String* parent,
     674             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     675             :   inline int offset() const;
     676             :   inline void set_offset(int offset);
     677             : 
     678             :   // Dispatched behavior.
     679             :   V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
     680             : 
     681             :   DECL_CAST(SlicedString)
     682             : 
     683             :   // Layout description.
     684             :   static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
     685             :   static const int kOffsetOffset = kParentOffset + kPointerSize;
     686             :   static const int kSize = kOffsetOffset + kPointerSize;
     687             : 
     688             :   // Minimum length for a sliced string.
     689             :   static const int kMinLength = 13;
     690             : 
     691             :   typedef FixedBodyDescriptor<kParentOffset, kOffsetOffset + kPointerSize,
     692             :                               kSize>
     693             :       BodyDescriptor;
     694             :   // No weak fields.
     695             :   typedef BodyDescriptor BodyDescriptorWeak;
     696             : 
     697             :   DECL_VERIFIER(SlicedString)
     698             : 
     699             :  private:
     700             :   DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
     701             : };
     702             : 
     703             : // The ExternalString class describes string values that are backed by
     704             : // a string resource that lies outside the V8 heap.  ExternalStrings
     705             : // consist of the length field common to all strings, a pointer to the
     706             : // external resource.  It is important to ensure (externally) that the
     707             : // resource is not deallocated while the ExternalString is live in the
     708             : // V8 heap.
     709             : //
     710             : // The API expects that all ExternalStrings are created through the
     711             : // API.  Therefore, ExternalStrings should not be used internally.
     712             : class ExternalString : public String {
     713             :  public:
     714             :   DECL_CAST(ExternalString)
     715             : 
     716             :   // Layout description.
     717             :   static const int kResourceOffset = POINTER_SIZE_ALIGN(String::kSize);
     718             :   static const int kShortSize = kResourceOffset + kPointerSize;
     719             :   static const int kResourceDataOffset = kResourceOffset + kPointerSize;
     720             :   static const int kSize = kResourceDataOffset + kPointerSize;
     721             : 
     722             :   // Return whether external string is short (data pointer is not cached).
     723             :   inline bool is_short();
     724             : 
     725             :   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
     726             : 
     727             :  private:
     728             :   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalString);
     729             : };
     730             : 
     731             : // The ExternalOneByteString class is an external string backed by an
     732             : // one-byte string.
     733             : class ExternalOneByteString : public ExternalString {
     734             :  public:
     735             :   static const bool kHasOneByteEncoding = true;
     736             : 
     737             :   typedef v8::String::ExternalOneByteStringResource Resource;
     738             : 
     739             :   // The underlying resource.
     740             :   inline const Resource* resource();
     741             :   inline void set_resource(const Resource* buffer);
     742             : 
     743             :   // Update the pointer cache to the external character array.
     744             :   // The cached pointer is always valid, as the external character array does =
     745             :   // not move during lifetime.  Deserialization is the only exception, after
     746             :   // which the pointer cache has to be refreshed.
     747             :   inline void update_data_cache();
     748             : 
     749             :   inline const uint8_t* GetChars();
     750             : 
     751             :   // Dispatched behavior.
     752             :   inline uint16_t ExternalOneByteStringGet(int index);
     753             : 
     754             :   DECL_CAST(ExternalOneByteString)
     755             : 
     756             :   class BodyDescriptor;
     757             :   // No weak fields.
     758             :   typedef BodyDescriptor BodyDescriptorWeak;
     759             : 
     760             :  private:
     761             :   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalOneByteString);
     762             : };
     763             : 
     764             : // The ExternalTwoByteString class is an external string backed by a UTF-16
     765             : // encoded string.
     766             : class ExternalTwoByteString : public ExternalString {
     767             :  public:
     768             :   static const bool kHasOneByteEncoding = false;
     769             : 
     770             :   typedef v8::String::ExternalStringResource Resource;
     771             : 
     772             :   // The underlying string resource.
     773             :   inline const Resource* resource();
     774             :   inline void set_resource(const Resource* buffer);
     775             : 
     776             :   // Update the pointer cache to the external character array.
     777             :   // The cached pointer is always valid, as the external character array does =
     778             :   // not move during lifetime.  Deserialization is the only exception, after
     779             :   // which the pointer cache has to be refreshed.
     780             :   inline void update_data_cache();
     781             : 
     782             :   inline const uint16_t* GetChars();
     783             : 
     784             :   // Dispatched behavior.
     785             :   inline uint16_t ExternalTwoByteStringGet(int index);
     786             : 
     787             :   // For regexp code.
     788             :   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
     789             : 
     790             :   DECL_CAST(ExternalTwoByteString)
     791             : 
     792             :   class BodyDescriptor;
     793             :   // No weak fields.
     794             :   typedef BodyDescriptor BodyDescriptorWeak;
     795             : 
     796             :  private:
     797             :   DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString);
     798             : };
     799             : 
     800             : // A flat string reader provides random access to the contents of a
     801             : // string independent of the character width of the string.  The handle
     802             : // must be valid as long as the reader is being used.
     803     2634567 : class FlatStringReader : public Relocatable {
     804             :  public:
     805             :   FlatStringReader(Isolate* isolate, Handle<String> str);
     806             :   FlatStringReader(Isolate* isolate, Vector<const char> input);
     807             :   void PostGarbageCollection();
     808             :   inline uc32 Get(int index);
     809             :   template <typename Char>
     810             :   inline Char Get(int index);
     811  2711419590 :   int length() { return length_; }
     812             : 
     813             :  private:
     814             :   String** str_;
     815             :   bool is_one_byte_;
     816             :   int length_;
     817             :   const void* start_;
     818             : };
     819             : 
     820             : // This maintains an off-stack representation of the stack frames required
     821             : // to traverse a ConsString, allowing an entirely iterative and restartable
     822             : // traversal of the entire string
     823             : class ConsStringIterator {
     824             :  public:
     825             :   inline ConsStringIterator() {}
     826             :   inline explicit ConsStringIterator(ConsString* cons_string, int offset = 0) {
     827             :     Reset(cons_string, offset);
     828             :   }
     829             :   inline void Reset(ConsString* cons_string, int offset = 0) {
     830    13936302 :     depth_ = 0;
     831             :     // Next will always return nullptr.
     832    13929225 :     if (cons_string == nullptr) return;
     833       47184 :     Initialize(cons_string, offset);
     834             :   }
     835             :   // Returns nullptr when complete.
     836             :   inline String* Next(int* offset_out) {
     837    51427127 :     *offset_out = 0;
     838    51427127 :     if (depth_ == 0) return nullptr;
     839    43511997 :     return Continue(offset_out);
     840             :   }
     841             : 
     842             :  private:
     843             :   static const int kStackSize = 32;
     844             :   // Use a mask instead of doing modulo operations for stack wrapping.
     845             :   static const int kDepthMask = kStackSize - 1;
     846             :   static_assert(base::bits::IsPowerOfTwo(kStackSize),
     847             :                 "kStackSize must be power of two");
     848             :   static inline int OffsetForDepth(int depth);
     849             : 
     850             :   inline void PushLeft(ConsString* string);
     851             :   inline void PushRight(ConsString* string);
     852             :   inline void AdjustMaximumDepth();
     853             :   inline void Pop();
     854    83427706 :   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
     855             :   void Initialize(ConsString* cons_string, int offset);
     856             :   String* Continue(int* offset_out);
     857             :   String* NextLeaf(bool* blew_stack);
     858             :   String* Search(int* offset_out);
     859             : 
     860             :   // Stack must always contain only frames for which right traversal
     861             :   // has not yet been performed.
     862             :   ConsString* frames_[kStackSize];
     863             :   ConsString* root_;
     864             :   int depth_;
     865             :   int maximum_depth_;
     866             :   int consumed_;
     867             :   DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
     868             : };
     869             : 
     870             : class StringCharacterStream {
     871             :  public:
     872             :   inline explicit StringCharacterStream(String* string, int offset = 0);
     873             :   inline uint16_t GetNext();
     874             :   inline bool HasMore();
     875             :   inline void Reset(String* string, int offset = 0);
     876             :   inline void VisitOneByteString(const uint8_t* chars, int length);
     877             :   inline void VisitTwoByteString(const uint16_t* chars, int length);
     878             : 
     879             :  private:
     880             :   ConsStringIterator iter_;
     881             :   bool is_one_byte_;
     882             :   union {
     883             :     const uint8_t* buffer8_;
     884             :     const uint16_t* buffer16_;
     885             :   };
     886             :   const uint8_t* end_;
     887             :   DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
     888             : };
     889             : 
     890             : }  // namespace internal
     891             : }  // namespace v8
     892             : 
     893             : #include "src/objects/object-macros-undef.h"
     894             : 
     895             : #endif  // V8_OBJECTS_STRING_H_

Generated by: LCOV version 1.10