LCOV - code coverage report
Current view: top level - src/objects - string.h (source / functions) Hit Total Coverage
Test: app.info Lines: 41 50 82.0 %
Date: 2019-04-19 Functions: 2 12 16.7 %

          Line data    Source code
       1             : // Copyright 2017 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #ifndef V8_OBJECTS_STRING_H_
       6             : #define V8_OBJECTS_STRING_H_
       7             : 
       8             : #include "src/base/bits.h"
       9             : #include "src/base/export-template.h"
      10             : #include "src/objects/instance-type.h"
      11             : #include "src/objects/name.h"
      12             : #include "src/objects/smi.h"
      13             : #include "src/unicode-decoder.h"
      14             : 
      15             : // Has to be the last include (doesn't have include guards):
      16             : #include "src/objects/object-macros.h"
      17             : 
      18             : namespace v8 {
      19             : namespace internal {
      20             : 
      21             : enum InstanceType : uint16_t;
      22             : 
      23             : enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
      24             : enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
      25             : 
      26             : // The characteristics of a string are stored in its map.  Retrieving these
      27             : // few bits of information is moderately expensive, involving two memory
      28             : // loads where the second is dependent on the first.  To improve efficiency
      29             : // the shape of the string is given its own class so that it can be retrieved
      30             : // once and used for several string operations.  A StringShape is small enough
      31             : // to be passed by value and is immutable, but be aware that flattening a
      32             : // string can potentially alter its shape.  Also be aware that a GC caused by
      33             : // something else can alter the shape of a string due to ConsString
      34             : // shortcutting.  Keeping these restrictions in mind has proven to be error-
      35             : // prone and so we no longer put StringShapes in variables unless there is a
      36             : // concrete performance benefit at that particular point in the code.
      37             : class StringShape {
      38             :  public:
      39             :   inline explicit StringShape(const String s);
      40             :   inline explicit StringShape(Map s);
      41             :   inline explicit StringShape(InstanceType t);
      42             :   inline bool IsSequential();
      43             :   inline bool IsExternal();
      44             :   inline bool IsCons();
      45             :   inline bool IsSliced();
      46             :   inline bool IsThin();
      47             :   inline bool IsIndirect();
      48             :   inline bool IsExternalOneByte();
      49             :   inline bool IsExternalTwoByte();
      50             :   inline bool IsSequentialOneByte();
      51             :   inline bool IsSequentialTwoByte();
      52             :   inline bool IsInternalized();
      53             :   inline StringRepresentationTag representation_tag();
      54             :   inline uint32_t encoding_tag();
      55             :   inline uint32_t full_representation_tag();
      56             : #ifdef DEBUG
      57             :   inline uint32_t type() { return type_; }
      58             :   inline void invalidate() { valid_ = false; }
      59             :   inline bool valid() { return valid_; }
      60             : #else
      61             :   inline void invalidate() {}
      62             : #endif
      63             : 
      64             :  private:
      65             :   uint32_t type_;
      66             : #ifdef DEBUG
      67             :   inline void set_valid() { valid_ = true; }
      68             :   bool valid_;
      69             : #else
      70             :   inline void set_valid() {}
      71             : #endif
      72             : };
      73             : 
      74             : // The String abstract class captures JavaScript string values:
      75             : //
      76             : // Ecma-262:
      77             : //  4.3.16 String Value
      78             : //    A string value is a member of the type String and is a finite
      79             : //    ordered sequence of zero or more 16-bit unsigned integer values.
      80             : //
      81             : // All string values have a length field.
      82             : class String : public Name {
      83             :  public:
      84             :   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
      85             : 
      86             :   // Representation of the flat content of a String.
      87             :   // A non-flat string doesn't have flat content.
      88             :   // A flat string has content that's encoded as a sequence of either
      89             :   // one-byte chars or two-byte UC16.
      90             :   // Returned by String::GetFlatContent().
      91             :   class FlatContent {
      92             :    public:
      93             :     // Returns true if the string is flat and this structure contains content.
      94             :     bool IsFlat() const { return state_ != NON_FLAT; }
      95             :     // Returns true if the structure contains one-byte content.
      96     1495262 :     bool IsOneByte() const { return state_ == ONE_BYTE; }
      97             :     // Returns true if the structure contains two-byte content.
      98             :     bool IsTwoByte() const { return state_ == TWO_BYTE; }
      99             : 
     100             :     // Return the one byte content of the string. Only use if IsOneByte()
     101             :     // returns true.
     102             :     Vector<const uint8_t> ToOneByteVector() const {
     103             :       DCHECK_EQ(ONE_BYTE, state_);
     104    32249155 :       return Vector<const uint8_t>(onebyte_start, length_);
     105             :     }
     106             :     // Return the two-byte content of the string. Only use if IsTwoByte()
     107             :     // returns true.
     108             :     Vector<const uc16> ToUC16Vector() const {
     109             :       DCHECK_EQ(TWO_BYTE, state_);
     110     6051705 :       return Vector<const uc16>(twobyte_start, length_);
     111             :     }
     112             : 
     113             :     uc16 Get(int i) const {
     114             :       DCHECK(i < length_);
     115             :       DCHECK(state_ != NON_FLAT);
     116    62215639 :       if (state_ == ONE_BYTE) return onebyte_start[i];
     117    15286027 :       return twobyte_start[i];
     118             :     }
     119             : 
     120             :     bool UsesSameString(const FlatContent& other) const {
     121             :       return onebyte_start == other.onebyte_start;
     122             :     }
     123             : 
     124             :    private:
     125             :     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
     126             : 
     127             :     // Constructors only used by String::GetFlatContent().
     128             :     explicit FlatContent(const uint8_t* start, int length)
     129             :         : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
     130             :     explicit FlatContent(const uc16* start, int length)
     131             :         : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
     132             :     FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
     133             : 
     134             :     union {
     135             :       const uint8_t* onebyte_start;
     136             :       const uc16* twobyte_start;
     137             :     };
     138             :     int length_;
     139             :     State state_;
     140             : 
     141             :     friend class String;
     142             :     friend class IterableSubString;
     143             :   };
     144             : 
     145             :   template <typename Char>
     146             :   V8_INLINE Vector<const Char> GetCharVector(
     147             :       const DisallowHeapAllocation& no_gc);
     148             : 
     149             :   // Get and set the length of the string.
     150             :   inline int length() const;
     151             :   inline void set_length(int value);
     152             : 
     153             :   // Get and set the length of the string using acquire loads and release
     154             :   // stores.
     155             :   inline int synchronized_length() const;
     156             :   inline void synchronized_set_length(int value);
     157             : 
     158             :   // Returns whether this string has only one-byte chars, i.e. all of them can
     159             :   // be one-byte encoded.  This might be the case even if the string is
     160             :   // two-byte.  Such strings may appear when the embedder prefers
     161             :   // two-byte external representations even for one-byte data.
     162             :   inline bool IsOneByteRepresentation() const;
     163             :   inline bool IsTwoByteRepresentation() const;
     164             : 
     165             :   // Cons and slices have an encoding flag that may not represent the actual
     166             :   // encoding of the underlying string.  This is taken into account here.
     167             :   // This function is static because that helps it get inlined.
     168             :   // Requires: string.IsFlat()
     169             :   static inline bool IsOneByteRepresentationUnderneath(String string);
     170             : 
     171             :   // Get and set individual two byte chars in the string.
     172             :   inline void Set(int index, uint16_t value);
     173             :   // Get individual two byte char in the string.  Repeated calls
     174             :   // to this method are not efficient unless the string is flat.
     175             :   V8_INLINE uint16_t Get(int index);
     176             : 
     177             :   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
     178             :   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
     179             : 
     180             :   // Flattens the string.  Checks first inline to see if it is
     181             :   // necessary.  Does nothing if the string is not a cons string.
     182             :   // Flattening allocates a sequential string with the same data as
     183             :   // the given string and mutates the cons string to a degenerate
     184             :   // form, where the first component is the new sequential string and
     185             :   // the second component is the empty string.  If allocation fails,
     186             :   // this function returns a failure.  If flattening succeeds, this
     187             :   // function returns the sequential string that is now the first
     188             :   // component of the cons string.
     189             :   //
     190             :   // Degenerate cons strings are handled specially by the garbage
     191             :   // collector (see IsShortcutCandidate).
     192             : 
     193             :   static inline Handle<String> Flatten(
     194             :       Isolate* isolate, Handle<String> string,
     195             :       AllocationType allocation = AllocationType::kYoung);
     196             : 
     197             :   // Tries to return the content of a flat string as a structure holding either
     198             :   // a flat vector of char or of uc16.
     199             :   // If the string isn't flat, and therefore doesn't have flat content, the
     200             :   // returned structure will report so, and can't provide a vector of either
     201             :   // kind.
     202             :   V8_EXPORT_PRIVATE FlatContent
     203             :   GetFlatContent(const DisallowHeapAllocation& no_gc);
     204             : 
     205             :   // Returns the parent of a sliced string or first part of a flat cons string.
     206             :   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
     207             :   inline String GetUnderlying();
     208             : 
     209             :   // String relational comparison, implemented according to ES6 section 7.2.11
     210             :   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
     211             :   // simple lexicographic ordering on sequences of code unit values. There is no
     212             :   // attempt to use the more complex, semantically oriented definitions of
     213             :   // character or string equality and collating order defined in the Unicode
     214             :   // specification. Therefore String values that are canonically equal according
     215             :   // to the Unicode standard could test as unequal. In effect this algorithm
     216             :   // assumes that both Strings are already in normalized form. Also, note that
     217             :   // for strings containing supplementary characters, lexicographic ordering on
     218             :   // sequences of UTF-16 code unit values differs from that on sequences of code
     219             :   // point values.
     220             :   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
     221             :                                                         Handle<String> x,
     222             :                                                         Handle<String> y);
     223             : 
     224             :   // Perform ES6 21.1.3.8, including checking arguments.
     225             :   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
     226             :                         Handle<Object> search, Handle<Object> position);
     227             :   // Perform string match of pattern on subject, starting at start index.
     228             :   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
     229             :   // check any arguments.
     230             :   static int IndexOf(Isolate* isolate, Handle<String> receiver,
     231             :                      Handle<String> search, int start_index);
     232             : 
     233             :   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
     234             :                             Handle<Object> search, Handle<Object> position);
     235             : 
     236             :   // Encapsulates logic related to a match and its capture groups as required
     237             :   // by GetSubstitution.
     238        3912 :   class Match {
     239             :    public:
     240             :     virtual Handle<String> GetMatch() = 0;
     241             :     virtual Handle<String> GetPrefix() = 0;
     242             :     virtual Handle<String> GetSuffix() = 0;
     243             : 
     244             :     // A named capture can be invalid (if it is not specified in the pattern),
     245             :     // unmatched (specified but not matched in the current string), and matched.
     246             :     enum CaptureState { INVALID, UNMATCHED, MATCHED };
     247             : 
     248             :     virtual int CaptureCount() = 0;
     249             :     virtual bool HasNamedCaptures() = 0;
     250             :     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
     251             :     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
     252             :                                                 CaptureState* state) = 0;
     253             : 
     254        3912 :     virtual ~Match() = default;
     255             :   };
     256             : 
     257             :   // ES#sec-getsubstitution
     258             :   // GetSubstitution(matched, str, position, captures, replacement)
     259             :   // Expand the $-expressions in the string and return a new string with
     260             :   // the result.
     261             :   // A {start_index} can be passed to specify where to start scanning the
     262             :   // replacement string.
     263             :   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
     264             :       Isolate* isolate, Match* match, Handle<String> replacement,
     265             :       int start_index = 0);
     266             : 
     267             :   // String equality operations.
     268             :   inline bool Equals(String other);
     269             :   inline static bool Equals(Isolate* isolate, Handle<String> one,
     270             :                             Handle<String> two);
     271             :   V8_EXPORT_PRIVATE bool IsUtf8EqualTo(Vector<const char> str,
     272             :                                        bool allow_prefix_match = false);
     273             : 
     274             :   // Dispatches to Is{One,Two}ByteEqualTo.
     275             :   template <typename Char>
     276             :   bool IsEqualTo(Vector<const Char> str);
     277             : 
     278             :   V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const uint8_t> str);
     279             :   bool IsTwoByteEqualTo(Vector<const uc16> str);
     280             : 
     281             :   // Return a UTF8 representation of the string.  The string is null
     282             :   // terminated but may optionally contain nulls.  Length is returned
     283             :   // in length_output if length_output is not a null pointer  The string
     284             :   // should be nearly flat, otherwise the performance of this method may
     285             :   // be very slow (quadratic in the length).  Setting robustness_flag to
     286             :   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
     287             :   // handles unexpected data without causing assert failures and it does not
     288             :   // do any heap allocations.  This is useful when printing stack traces.
     289             :   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
     290             :                                     RobustnessFlag robustness_flag, int offset,
     291             :                                     int length, int* length_output = nullptr);
     292             :   V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
     293             :       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
     294             :       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
     295             :       int* length_output = nullptr);
     296             : 
     297             :   bool ComputeArrayIndex(uint32_t* index);
     298             : 
     299             :   // Externalization.
     300             :   V8_EXPORT_PRIVATE bool MakeExternal(
     301             :       v8::String::ExternalStringResource* resource);
     302             :   V8_EXPORT_PRIVATE bool MakeExternal(
     303             :       v8::String::ExternalOneByteStringResource* resource);
     304             :   bool SupportsExternalization();
     305             : 
     306             :   // Conversion.
     307             :   inline bool AsArrayIndex(uint32_t* index);
     308             :   uint32_t inline ToValidIndex(Object number);
     309             : 
     310             :   // Trimming.
     311             :   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
     312             :   static Handle<String> Trim(Isolate* isolate, Handle<String> string,
     313             :                              TrimMode mode);
     314             : 
     315             :   DECL_CAST(String)
     316             : 
     317             :   V8_EXPORT_PRIVATE void PrintOn(FILE* out);
     318             : 
     319             :   // For use during stack traces.  Performs rudimentary sanity check.
     320             :   bool LooksValid();
     321             : 
     322             :   // Dispatched behavior.
     323             :   void StringShortPrint(StringStream* accumulator, bool show_details = true);
     324             :   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
     325             : #if defined(DEBUG) || defined(OBJECT_PRINT)
     326             :   char* ToAsciiArray();
     327             : #endif
     328             :   DECL_PRINTER(String)
     329             :   DECL_VERIFIER(String)
     330             : 
     331             :   inline bool IsFlat();
     332             : 
     333             :   DEFINE_FIELD_OFFSET_CONSTANTS(Name::kHeaderSize,
     334             :                                 TORQUE_GENERATED_STRING_FIELDS)
     335             : 
     336             :   static const int kHeaderSize = kSize;
     337             : 
     338             :   // Max char codes.
     339             :   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
     340             :   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
     341             :   static const int kMaxUtf16CodeUnit = 0xffff;
     342             :   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
     343             :   static const uc32 kMaxCodePoint = 0x10ffff;
     344             : 
     345             :   // Maximal string length.
     346             :   // The max length is different on 32 and 64 bit platforms. Max length for a
     347             :   // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
     348             :   // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
     349             :   // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
     350             :   // each char needs two bytes, subtract 24 bytes for the string header size.
     351             : 
     352             :   // See include/v8.h for the definition.
     353             :   static const int kMaxLength = v8::String::kMaxLength;
     354             :   static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kHeaderSize),
     355             :                 "Unexpected max String length");
     356             : 
     357             :   // Max length for computing hash. For strings longer than this limit the
     358             :   // string length is used as the hash value.
     359             :   static const int kMaxHashCalcLength = 16383;
     360             : 
     361             :   // Limit for truncation in short printing.
     362             :   static const int kMaxShortPrintLength = 1024;
     363             : 
     364             :   // Helper function for flattening strings.
     365             :   template <typename sinkchar>
     366             :   EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
     367             :   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
     368             : 
     369             :   // The return value may point to the first aligned word containing the first
     370             :   // non-one-byte character, rather than directly to the non-one-byte character.
     371             :   // If the return value is >= the passed length, the entire string was
     372             :   // one-byte.
     373    10403179 :   static inline int NonAsciiStart(const char* chars, int length) {
     374             :     const char* start = chars;
     375    10403179 :     const char* limit = chars + length;
     376             : 
     377    10403179 :     if (length >= kIntptrSize) {
     378             :       // Check unaligned bytes.
     379    10601970 :       while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
     380     4452491 :         if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     381         280 :           return static_cast<int>(chars - start);
     382             :         }
     383     4452211 :         ++chars;
     384             :       }
     385             :       // Check aligned words.
     386             :       DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
     387             :       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
     388   888537138 :       while (chars + sizeof(uintptr_t) <= limit) {
     389   886841479 :         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
     390        1609 :           return static_cast<int>(chars - start);
     391             :         }
     392             :         chars += sizeof(uintptr_t);
     393             :       }
     394             :     }
     395             :     // Check remaining unaligned bytes.
     396    83541162 :     while (chars < limit) {
     397    36573492 :       if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
     398        3556 :         return static_cast<int>(chars - start);
     399             :       }
     400    36569936 :       ++chars;
     401             :     }
     402             : 
     403    10397734 :     return static_cast<int>(chars - start);
     404             :   }
     405             : 
     406             :   static inline bool IsAscii(const char* chars, int length) {
     407      120571 :     return NonAsciiStart(chars, length) >= length;
     408             :   }
     409             : 
     410             :   static inline bool IsAscii(const uint8_t* chars, int length) {
     411           0 :     return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
     412             :            length;
     413             :   }
     414             : 
     415             :   static inline int NonOneByteStart(const uc16* chars, int length) {
     416     1867743 :     const uc16* limit = chars + length;
     417             :     const uc16* start = chars;
     418  1527882154 :     while (chars < limit) {
     419  1526629287 :       if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
     420  1526014411 :       ++chars;
     421             :     }
     422     1252867 :     return static_cast<int>(chars - start);
     423             :   }
     424             : 
     425             :   static inline bool IsOneByte(const uc16* chars, int length) {
     426             :     return NonOneByteStart(chars, length) >= length;
     427             :   }
     428             : 
     429             :   template <class Visitor>
     430             :   static inline ConsString VisitFlat(Visitor* visitor, String string,
     431             :                                      int offset = 0);
     432             : 
     433             :   static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
     434             :                                               Handle<String> string,
     435             :                                               bool include_ending_line);
     436             : 
     437             :  private:
     438             :   friend class Name;
     439             :   friend class StringTableInsertionKey;
     440             :   friend class InternalizedStringKey;
     441             : 
     442             :   V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
     443             :       Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);
     444             : 
     445             :   // Slow case of String::Equals.  This implementation works on any strings
     446             :   // but it is most efficient on strings that are almost flat.
     447             :   V8_EXPORT_PRIVATE bool SlowEquals(String other);
     448             : 
     449             :   V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
     450             :                                            Handle<String> two);
     451             : 
     452             :   // Slow case of AsArrayIndex.
     453             :   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
     454             : 
     455             :   // Compute and set the hash code.
     456             :   V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
     457             : 
     458           0 :   OBJECT_CONSTRUCTORS(String, Name);
     459             : };
     460             : 
     461             : // clang-format off
     462             : extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
     463             : void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
     464             : // clang-format on
     465             : 
     466             : class SubStringRange {
     467             :  public:
     468             :   inline SubStringRange(String string, const DisallowHeapAllocation& no_gc,
     469             :                         int first = 0, int length = -1);
     470             :   class iterator;
     471             :   inline iterator begin();
     472             :   inline iterator end();
     473             : 
     474             :  private:
     475             :   String string_;
     476             :   int first_;
     477             :   int length_;
     478             :   const DisallowHeapAllocation& no_gc_;
     479             : };
     480             : 
     481             : // The SeqString abstract class captures sequential string values.
     482             : class SeqString : public String {
     483             :  public:
     484             :   DECL_CAST(SeqString)
     485             : 
     486             :   // Truncate the string in-place if possible and return the result.
     487             :   // In case of new_length == 0, the empty string is returned without
     488             :   // truncating the original string.
     489             :   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
     490             :                                                        int new_length);
     491             : 
     492             :   OBJECT_CONSTRUCTORS(SeqString, String);
     493             : };
     494             : 
     495             : class InternalizedString : public String {
     496             :  public:
     497             :   DECL_CAST(InternalizedString)
     498             :   // TODO(neis): Possibly move some stuff from String here.
     499             : 
     500             :   OBJECT_CONSTRUCTORS(InternalizedString, String);
     501             : };
     502             : 
     503             : // The OneByteString class captures sequential one-byte string objects.
     504             : // Each character in the OneByteString is an one-byte character.
     505             : class SeqOneByteString : public SeqString {
     506             :  public:
     507             :   static const bool kHasOneByteEncoding = true;
     508             : 
     509             :   // Dispatched behavior.
     510             :   inline uint16_t SeqOneByteStringGet(int index);
     511             :   inline void SeqOneByteStringSet(int index, uint16_t value);
     512             : 
     513             :   // Get the address of the characters in this string.
     514             :   inline Address GetCharsAddress();
     515             : 
     516             :   inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
     517             : 
     518             :   // Clear uninitialized padding space. This ensures that the snapshot content
     519             :   // is deterministic.
     520             :   void clear_padding();
     521             : 
     522             :   DECL_CAST(SeqOneByteString)
     523             : 
     524             :   // Garbage collection support.  This method is called by the
     525             :   // garbage collector to compute the actual size of an OneByteString
     526             :   // instance.
     527             :   inline int SeqOneByteStringSize(InstanceType instance_type);
     528             : 
     529             :   // Computes the size for an OneByteString instance of a given length.
     530             :   static int SizeFor(int length) {
     531   354532294 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
     532             :   }
     533             : 
     534             :   // Maximal memory usage for a single sequential one-byte string.
     535             :   static const int kMaxCharsSize = kMaxLength;
     536             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     537             :   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
     538             : 
     539             :   class BodyDescriptor;
     540             : 
     541           0 :   OBJECT_CONSTRUCTORS(SeqOneByteString, SeqString);
     542             : };
     543             : 
     544             : // The TwoByteString class captures sequential unicode string objects.
     545             : // Each character in the TwoByteString is a two-byte uint16_t.
     546             : class SeqTwoByteString : public SeqString {
     547             :  public:
     548             :   static const bool kHasOneByteEncoding = false;
     549             : 
     550             :   // Dispatched behavior.
     551             :   inline uint16_t SeqTwoByteStringGet(int index);
     552             :   inline void SeqTwoByteStringSet(int index, uint16_t value);
     553             : 
     554             :   // Get the address of the characters in this string.
     555             :   inline Address GetCharsAddress();
     556             : 
     557             :   inline uc16* GetChars(const DisallowHeapAllocation& no_gc);
     558             : 
     559             :   // Clear uninitialized padding space. This ensures that the snapshot content
     560             :   // is deterministic.
     561             :   void clear_padding();
     562             : 
     563             :   DECL_CAST(SeqTwoByteString)
     564             : 
     565             :   // Garbage collection support.  This method is called by the
     566             :   // garbage collector to compute the actual size of a TwoByteString
     567             :   // instance.
     568             :   inline int SeqTwoByteStringSize(InstanceType instance_type);
     569             : 
     570             :   // Computes the size for a TwoByteString instance of a given length.
     571             :   static int SizeFor(int length) {
     572   199003715 :     return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
     573             :   }
     574             : 
     575             :   // Maximal memory usage for a single sequential two-byte string.
     576             :   static const int kMaxCharsSize = kMaxLength * 2;
     577             :   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
     578             :   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
     579             :                 String::kMaxLength);
     580             : 
     581             :   class BodyDescriptor;
     582             : 
     583           0 :   OBJECT_CONSTRUCTORS(SeqTwoByteString, SeqString);
     584             : };
     585             : 
     586             : // The ConsString class describes string values built by using the
     587             : // addition operator on strings.  A ConsString is a pair where the
     588             : // first and second components are pointers to other string values.
     589             : // One or both components of a ConsString can be pointers to other
     590             : // ConsStrings, creating a binary tree of ConsStrings where the leaves
     591             : // are non-ConsString string values.  The string value represented by
     592             : // a ConsString can be obtained by concatenating the leaf string
     593             : // values in a left-to-right depth-first traversal of the tree.
     594             : class ConsString : public String {
     595             :  public:
     596             :   // First string of the cons cell.
     597             :   inline String first();
     598             :   // Doesn't check that the result is a string, even in debug mode.  This is
     599             :   // useful during GC where the mark bits confuse the checks.
     600             :   inline Object unchecked_first();
     601             :   inline void set_first(Isolate* isolate, String first,
     602             :                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     603             : 
     604             :   // Second string of the cons cell.
     605             :   inline String second();
     606             :   // Doesn't check that the result is a string, even in debug mode.  This is
     607             :   // useful during GC where the mark bits confuse the checks.
     608             :   inline Object unchecked_second();
     609             :   inline void set_second(Isolate* isolate, String second,
     610             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     611             : 
     612             :   // Dispatched behavior.
     613             :   V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
     614             : 
     615             :   DECL_CAST(ConsString)
     616             : 
     617             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     618             :                                 TORQUE_GENERATED_CONS_STRING_FIELDS)
     619             : 
     620             :   // Minimum length for a cons string.
     621             :   static const int kMinLength = 13;
     622             : 
     623             :   using BodyDescriptor = FixedBodyDescriptor<kFirstOffset, kSize, kSize>;
     624             : 
     625             :   DECL_VERIFIER(ConsString)
     626             : 
     627           0 :   OBJECT_CONSTRUCTORS(ConsString, String);
     628             : };
     629             : 
     630             : // The ThinString class describes string objects that are just references
     631             : // to another string object. They are used for in-place internalization when
     632             : // the original string cannot actually be internalized in-place: in these
     633             : // cases, the original string is converted to a ThinString pointing at its
     634             : // internalized version (which is allocated as a new object).
     635             : // In terms of memory layout and most algorithms operating on strings,
     636             : // ThinStrings can be thought of as "one-part cons strings".
     637             : class ThinString : public String {
     638             :  public:
     639             :   // Actual string that this ThinString refers to.
     640             :   inline String actual() const;
     641             :   inline HeapObject unchecked_actual() const;
     642             :   inline void set_actual(String s,
     643             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     644             : 
     645             :   V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
     646             : 
     647             :   DECL_CAST(ThinString)
     648             :   DECL_VERIFIER(ThinString)
     649             : 
     650             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     651             :                                 TORQUE_GENERATED_THIN_STRING_FIELDS)
     652             : 
     653             :   using BodyDescriptor = FixedBodyDescriptor<kActualOffset, kSize, kSize>;
     654             : 
     655           0 :   OBJECT_CONSTRUCTORS(ThinString, String);
     656             : };
     657             : 
     658             : // The Sliced String class describes strings that are substrings of another
     659             : // sequential string.  The motivation is to save time and memory when creating
     660             : // a substring.  A Sliced String is described as a pointer to the parent,
     661             : // the offset from the start of the parent string and the length.  Using
     662             : // a Sliced String therefore requires unpacking of the parent string and
     663             : // adding the offset to the start address.  A substring of a Sliced String
     664             : // are not nested since the double indirection is simplified when creating
     665             : // such a substring.
     666             : // Currently missing features are:
     667             : //  - handling externalized parent strings
     668             : //  - external strings as parent
     669             : //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
     670             : class SlicedString : public String {
     671             :  public:
     672             :   inline String parent();
     673             :   inline void set_parent(Isolate* isolate, String parent,
     674             :                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
     675             :   inline int offset() const;
     676             :   inline void set_offset(int offset);
     677             : 
     678             :   // Dispatched behavior.
     679             :   V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
     680             : 
     681             :   DECL_CAST(SlicedString)
     682             : 
     683             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     684             :                                 TORQUE_GENERATED_SLICED_STRING_FIELDS)
     685             : 
     686             :   // Minimum length for a sliced string.
     687             :   static const int kMinLength = 13;
     688             : 
     689             :   using BodyDescriptor = FixedBodyDescriptor<kParentOffset, kSize, kSize>;
     690             : 
     691             :   DECL_VERIFIER(SlicedString)
     692             : 
     693           0 :   OBJECT_CONSTRUCTORS(SlicedString, String);
     694             : };
     695             : 
     696             : // The ExternalString class describes string values that are backed by
     697             : // a string resource that lies outside the V8 heap.  ExternalStrings
     698             : // consist of the length field common to all strings, a pointer to the
     699             : // external resource.  It is important to ensure (externally) that the
     700             : // resource is not deallocated while the ExternalString is live in the
     701             : // V8 heap.
     702             : //
     703             : // The API expects that all ExternalStrings are created through the
     704             : // API.  Therefore, ExternalStrings should not be used internally.
     705             : class ExternalString : public String {
     706             :  public:
     707             :   DECL_CAST(ExternalString)
     708             : 
     709             :   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
     710             :                                 TORQUE_GENERATED_EXTERNAL_STRING_FIELDS)
     711             : 
     712             :   // Size of uncached external strings.
     713             :   static const int kUncachedSize =
     714             :       kResourceOffset + FIELD_SIZE(kResourceOffset);
     715             : 
     716             :   // Return whether the external string data pointer is not cached.
     717             :   inline bool is_uncached() const;
     718             :   // Size in bytes of the external payload.
     719             :   int ExternalPayloadSize() const;
     720             : 
     721             :   // Used in the serializer/deserializer.
     722             :   inline Address resource_as_address();
     723             :   inline void set_address_as_resource(Address address);
     724             :   inline uint32_t resource_as_uint32();
     725             :   inline void set_uint32_as_resource(uint32_t value);
     726             : 
     727             :   // Disposes string's resource object if it has not already been disposed.
     728             :   inline void DisposeResource();
     729             : 
     730             :   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
     731             : 
     732             :   OBJECT_CONSTRUCTORS(ExternalString, String);
     733             : };
     734             : 
     735             : // The ExternalOneByteString class is an external string backed by an
     736             : // one-byte string.
     737             : class ExternalOneByteString : public ExternalString {
     738             :  public:
     739             :   static const bool kHasOneByteEncoding = true;
     740             : 
     741             :   using Resource = v8::String::ExternalOneByteStringResource;
     742             : 
     743             :   // The underlying resource.
     744             :   inline const Resource* resource();
     745             : 
     746             :   // It is assumed that the previous resource is null. If it is not null, then
     747             :   // it is the responsability of the caller the handle the previous resource.
     748             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     749             :   // Used only during serialization.
     750             :   inline void set_resource(const Resource* buffer);
     751             : 
     752             :   // Update the pointer cache to the external character array.
     753             :   // The cached pointer is always valid, as the external character array does =
     754             :   // not move during lifetime.  Deserialization is the only exception, after
     755             :   // which the pointer cache has to be refreshed.
     756             :   inline void update_data_cache();
     757             : 
     758             :   inline const uint8_t* GetChars();
     759             : 
     760             :   // Dispatched behavior.
     761             :   inline uint16_t ExternalOneByteStringGet(int index);
     762             : 
     763             :   DECL_CAST(ExternalOneByteString)
     764             : 
     765             :   class BodyDescriptor;
     766             : 
     767           0 :   OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
     768             : };
     769             : 
     770             : // The ExternalTwoByteString class is an external string backed by a UTF-16
     771             : // encoded string.
     772             : class ExternalTwoByteString : public ExternalString {
     773             :  public:
     774             :   static const bool kHasOneByteEncoding = false;
     775             : 
     776             :   using Resource = v8::String::ExternalStringResource;
     777             : 
     778             :   // The underlying string resource.
     779             :   inline const Resource* resource();
     780             : 
     781             :   // It is assumed that the previous resource is null. If it is not null, then
     782             :   // it is the responsability of the caller the handle the previous resource.
     783             :   inline void SetResource(Isolate* isolate, const Resource* buffer);
     784             :   // Used only during serialization.
     785             :   inline void set_resource(const Resource* buffer);
     786             : 
     787             :   // Update the pointer cache to the external character array.
     788             :   // The cached pointer is always valid, as the external character array does =
     789             :   // not move during lifetime.  Deserialization is the only exception, after
     790             :   // which the pointer cache has to be refreshed.
     791             :   inline void update_data_cache();
     792             : 
     793             :   inline const uint16_t* GetChars();
     794             : 
     795             :   // Dispatched behavior.
     796             :   inline uint16_t ExternalTwoByteStringGet(int index);
     797             : 
     798             :   // For regexp code.
     799             :   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
     800             : 
     801             :   DECL_CAST(ExternalTwoByteString)
     802             : 
     803             :   class BodyDescriptor;
     804             : 
     805           0 :   OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
     806             : };
     807             : 
     808             : // A flat string reader provides random access to the contents of a
     809             : // string independent of the character width of the string.  The handle
     810             : // must be valid as long as the reader is being used.
     811     2990744 : class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
     812             :  public:
     813             :   FlatStringReader(Isolate* isolate, Handle<String> str);
     814             :   FlatStringReader(Isolate* isolate, Vector<const char> input);
     815             :   void PostGarbageCollection() override;
     816             :   inline uc32 Get(int index);
     817             :   template <typename Char>
     818             :   inline Char Get(int index);
     819             :   int length() { return length_; }
     820             : 
     821             :  private:
     822             :   Address* str_;
     823             :   bool is_one_byte_;
     824             :   int length_;
     825             :   const void* start_;
     826             : };
     827             : 
     828             : // This maintains an off-stack representation of the stack frames required
     829             : // to traverse a ConsString, allowing an entirely iterative and restartable
     830             : // traversal of the entire string
     831             : class ConsStringIterator {
     832             :  public:
     833     6459412 :   inline ConsStringIterator() = default;
     834       67010 :   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
     835             :     Reset(cons_string, offset);
     836       67010 :   }
     837             :   inline void Reset(ConsString cons_string, int offset = 0) {
     838    11830687 :     depth_ = 0;
     839             :     // Next will always return nullptr.
     840    11795045 :     if (cons_string.is_null()) return;
     841      108701 :     Initialize(cons_string, offset);
     842             :   }
     843             :   // Returns nullptr when complete.
     844             :   inline String Next(int* offset_out) {
     845   222495059 :     *offset_out = 0;
     846   222495059 :     if (depth_ == 0) return String();
     847   211935450 :     return Continue(offset_out);
     848             :   }
     849             : 
     850             :  private:
     851             :   static const int kStackSize = 32;
     852             :   // Use a mask instead of doing modulo operations for stack wrapping.
     853             :   static const int kDepthMask = kStackSize - 1;
     854             :   static_assert(base::bits::IsPowerOfTwo(kStackSize),
     855             :                 "kStackSize must be power of two");
     856             :   static inline int OffsetForDepth(int depth);
     857             : 
     858             :   inline void PushLeft(ConsString string);
     859             :   inline void PushRight(ConsString string);
     860             :   inline void AdjustMaximumDepth();
     861             :   inline void Pop();
     862   424105312 :   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
     863             :   V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
     864             :   V8_EXPORT_PRIVATE String Continue(int* offset_out);
     865             :   String NextLeaf(bool* blew_stack);
     866             :   String Search(int* offset_out);
     867             : 
     868             :   // Stack must always contain only frames for which right traversal
     869             :   // has not yet been performed.
     870             :   ConsString frames_[kStackSize];
     871             :   ConsString root_;
     872             :   int depth_;
     873             :   int maximum_depth_;
     874             :   int consumed_;
     875             :   DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
     876             : };
     877             : 
     878             : class StringCharacterStream {
     879             :  public:
     880             :   inline explicit StringCharacterStream(String string, int offset = 0);
     881             :   inline uint16_t GetNext();
     882             :   inline bool HasMore();
     883             :   inline void Reset(String string, int offset = 0);
     884             :   inline void VisitOneByteString(const uint8_t* chars, int length);
     885             :   inline void VisitTwoByteString(const uint16_t* chars, int length);
     886             : 
     887             :  private:
     888             :   ConsStringIterator iter_;
     889             :   bool is_one_byte_;
     890             :   union {
     891             :     const uint8_t* buffer8_;
     892             :     const uint16_t* buffer16_;
     893             :   };
     894             :   const uint8_t* end_;
     895             :   DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
     896             : };
     897             : 
     898             : }  // namespace internal
     899             : }  // namespace v8
     900             : 
     901             : #include "src/objects/object-macros-undef.h"
     902             : 
     903             : #endif  // V8_OBJECTS_STRING_H_

Generated by: LCOV version 1.10