LCOV - app.info - src/inspector/string-16.cc

LCOV - code coverage report

Current view:	top level - src/inspector - string-16.cc (source / functions)		Hit	Total	Coverage
Test:	app.info	Lines:	73	208	35.1 %
Date:	2019-04-19	Functions:	22	31	71.0 %

          Line data    Source code

       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/inspector/string-16.h"
       6             : 
       7             : #include <algorithm>
       8             : #include <cctype>
       9             : #include <cstdlib>
      10             : #include <cstring>
      11             : #include <limits>
      12             : #include <string>
      13             : 
      14             : #include "src/base/platform/platform.h"
      15             : #include "src/base/v8-fallthrough.h"
      16             : #include "src/conversions.h"
      17             : 
      18             : namespace v8_inspector {
      19             : 
      20             : namespace {
      21             : 
      22      204375 : bool isASCII(UChar c) { return !(c & ~0x7F); }
      23             : 
      24             : bool isSpaceOrNewLine(UChar c) {
      25        1015 :   return isASCII(c) && c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
      26             : }
      27             : 
      28      151495 : int64_t charactersToInteger(const UChar* characters, size_t length,
      29             :                             bool* ok = nullptr) {
      30             :   std::vector<char> buffer;
      31      151495 :   buffer.reserve(length + 1);
      32      558215 :   for (size_t i = 0; i < length; ++i) {
      33      406720 :     if (!isASCII(characters[i])) {
      34           0 :       if (ok) *ok = false;
      35             :       return 0;
      36             :     }
      37      406720 :     buffer.push_back(static_cast<char>(characters[i]));
      38             :   }
      39      302990 :   buffer.push_back('\0');
      40             : 
      41             :   char* endptr;
      42             :   int64_t result =
      43      151495 :       static_cast<int64_t>(std::strtoll(buffer.data(), &endptr, 10));
      44      151495 :   if (ok) *ok = !(*endptr);
      45             :   return result;
      46             : }
      47             : 
      48             : const UChar replacementCharacter = 0xFFFD;
      49             : using UChar32 = uint32_t;
      50             : 
      51             : inline int inlineUTF8SequenceLengthNonASCII(char b0) {
      52           0 :   if ((b0 & 0xC0) != 0xC0) return 0;
      53           0 :   if ((b0 & 0xE0) == 0xC0) return 2;
      54           0 :   if ((b0 & 0xF0) == 0xE0) return 3;
      55           0 :   if ((b0 & 0xF8) == 0xF0) return 4;
      56             :   return 0;
      57             : }
      58             : 
      59             : inline int inlineUTF8SequenceLength(char b0) {
      60           0 :   return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
      61             : }
      62             : 
      63             : // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
      64             : // into the first byte, depending on how many bytes follow.  There are
      65             : // as many entries in this table as there are UTF-8 sequence types.
      66             : // (I.e., one byte sequence, two byte... etc.). Remember that sequences
      67             : // for *legal* UTF-8 will be 4 or fewer bytes total.
      68             : static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0,
      69             :                                                0xF0, 0xF8, 0xFC};
      70             : 
      71             : typedef enum {
      72             :   conversionOK,     // conversion successful
      73             :   sourceExhausted,  // partial character in source, but hit end
      74             :   targetExhausted,  // insuff. room in target for conversion
      75             :   sourceIllegal     // source sequence is illegal/malformed
      76             : } ConversionResult;
      77             : 
      78           0 : ConversionResult convertUTF16ToUTF8(const UChar** sourceStart,
      79             :                                     const UChar* sourceEnd, char** targetStart,
      80             :                                     char* targetEnd, bool strict) {
      81             :   ConversionResult result = conversionOK;
      82           0 :   const UChar* source = *sourceStart;
      83           0 :   char* target = *targetStart;
      84           0 :   while (source < sourceEnd) {
      85             :     UChar32 ch;
      86             :     uint32_t bytesToWrite = 0;
      87             :     const UChar32 byteMask = 0xBF;
      88             :     const UChar32 byteMark = 0x80;
      89             :     const UChar* oldSource =
      90             :         source;  // In case we have to back up because of target overflow.
      91           0 :     ch = static_cast<uint16_t>(*source++);
      92             :     // If we have a surrogate pair, convert to UChar32 first.
      93           0 :     if (ch >= 0xD800 && ch <= 0xDBFF) {
      94             :       // If the 16 bits following the high surrogate are in the source buffer...
      95           0 :       if (source < sourceEnd) {
      96           0 :         UChar32 ch2 = static_cast<uint16_t>(*source);
      97             :         // If it's a low surrogate, convert to UChar32.
      98           0 :         if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
      99           0 :           ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
     100           0 :           ++source;
     101           0 :         } else if (strict) {  // it's an unpaired high surrogate
     102             :           --source;           // return to the illegal value itself
     103             :           result = sourceIllegal;
     104             :           break;
     105             :         }
     106             :       } else {     // We don't have the 16 bits following the high surrogate.
     107             :         --source;  // return to the high surrogate
     108             :         result = sourceExhausted;
     109             :         break;
     110             :       }
     111           0 :     } else if (strict) {
     112             :       // UTF-16 surrogate values are illegal in UTF-32
     113           0 :       if (ch >= 0xDC00 && ch <= 0xDFFF) {
     114             :         --source;  // return to the illegal value itself
     115             :         result = sourceIllegal;
     116             :         break;
     117             :       }
     118             :     }
     119             :     // Figure out how many bytes the result will require
     120           0 :     if (ch < static_cast<UChar32>(0x80)) {
     121             :       bytesToWrite = 1;
     122           0 :     } else if (ch < static_cast<UChar32>(0x800)) {
     123             :       bytesToWrite = 2;
     124           0 :     } else if (ch < static_cast<UChar32>(0x10000)) {
     125             :       bytesToWrite = 3;
     126           0 :     } else if (ch < static_cast<UChar32>(0x110000)) {
     127             :       bytesToWrite = 4;
     128             :     } else {
     129             :       bytesToWrite = 3;
     130             :       ch = replacementCharacter;
     131             :     }
     132             : 
     133           0 :     target += bytesToWrite;
     134           0 :     if (target > targetEnd) {
     135             :       source = oldSource;  // Back up source pointer!
     136           0 :       target -= bytesToWrite;
     137             :       result = targetExhausted;
     138           0 :       break;
     139             :     }
     140           0 :     switch (bytesToWrite) {
     141             :       case 4:
     142           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     143           0 :         ch >>= 6;
     144             :         V8_FALLTHROUGH;
     145             :       case 3:
     146           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     147           0 :         ch >>= 6;
     148             :         V8_FALLTHROUGH;
     149             :       case 2:
     150           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     151           0 :         ch >>= 6;
     152             :         V8_FALLTHROUGH;
     153             :       case 1:
     154           0 :         *--target = static_cast<char>(ch | firstByteMark[bytesToWrite]);
     155             :     }
     156           0 :     target += bytesToWrite;
     157             :   }
     158           0 :   *sourceStart = source;
     159           0 :   *targetStart = target;
     160           0 :   return result;
     161             : }
     162             : 
     163             : /**
     164             :  * Is this code point a BMP code point (U+0000..U+ffff)?
     165             :  * @param c 32-bit code point
     166             :  * @return TRUE or FALSE
     167             :  * @stable ICU 2.8
     168             :  */
     169             : #define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF)
     170             : 
     171             : /**
     172             :  * Is this code point a supplementary code point (U+010000..U+10FFFF)?
     173             :  * @param c 32-bit code point
     174             :  * @return TRUE or FALSE
     175             :  * @stable ICU 2.8
     176             :  */
     177             : #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF)
     178             : 
     179             : /**
     180             :  * Is this code point a surrogate (U+d800..U+dfff)?
     181             :  * @param c 32-bit code point
     182             :  * @return TRUE or FALSE
     183             :  * @stable ICU 2.4
     184             :  */
     185             : #define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800)
     186             : 
     187             : /**
     188             :  * Get the lead surrogate (0xD800..0xDBFF) for a
     189             :  * supplementary code point (0x010000..0x10FFFF).
     190             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     191             :  * @return lead surrogate (U+D800..U+DBFF) for supplementary
     192             :  * @stable ICU 2.4
     193             :  */
     194             : #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0)
     195             : 
     196             : /**
     197             :  * Get the trail surrogate (0xDC00..0xDFFF) for a
     198             :  * supplementary code point (0x010000..0x10FFFF).
     199             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     200             :  * @return trail surrogate (U+DC00..U+DFFF) for supplementary
     201             :  * @stable ICU 2.4
     202             :  */
     203             : #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00)
     204             : 
     205             : // This must be called with the length pre-determined by the first byte.
     206             : // If presented with a length > 4, this returns false.  The Unicode
     207             : // definition of UTF-8 goes up to 4-byte sequences.
     208           0 : static bool isLegalUTF8(const unsigned char* source, int length) {
     209             :   unsigned char a;
     210           0 :   const unsigned char* srcptr = source + length;
     211           0 :   switch (length) {
     212             :     default:
     213             :       return false;
     214             :     // Everything else falls through when "true"...
     215             :     case 4:
     216           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     217             :       V8_FALLTHROUGH;
     218             :     case 3:
     219           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     220             :       V8_FALLTHROUGH;
     221             :     case 2:
     222           0 :       if ((a = (*--srcptr)) > 0xBF) return false;
     223             : 
     224             :       // no fall-through in this inner switch
     225           0 :       switch (*source) {
     226             :         case 0xE0:
     227           0 :           if (a < 0xA0) return false;
     228             :           break;
     229             :         case 0xED:
     230           0 :           if (a > 0x9F) return false;
     231             :           break;
     232             :         case 0xF0:
     233           0 :           if (a < 0x90) return false;
     234             :           break;
     235             :         case 0xF4:
     236           0 :           if (a > 0x8F) return false;
     237             :           break;
     238             :         default:
     239           0 :           if (a < 0x80) return false;
     240             :       }
     241             :       V8_FALLTHROUGH;
     242             : 
     243             :     case 1:
     244           0 :       if (*source >= 0x80 && *source < 0xC2) return false;
     245             :   }
     246           0 :   if (*source > 0xF4) return false;
     247           0 :   return true;
     248             : }
     249             : 
     250             : // Magic values subtracted from a buffer value during UTF8 conversion.
     251             : // This table contains as many values as there might be trailing bytes
     252             : // in a UTF-8 sequence.
     253             : static const UChar32 offsetsFromUTF8[6] = {0x00000000UL,
     254             :                                            0x00003080UL,
     255             :                                            0x000E2080UL,
     256             :                                            0x03C82080UL,
     257             :                                            static_cast<UChar32>(0xFA082080UL),
     258             :                                            static_cast<UChar32>(0x82082080UL)};
     259             : 
     260           0 : static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) {
     261             :   UChar32 character = 0;
     262             : 
     263             :   // The cases all fall through.
     264           0 :   switch (length) {
     265             :     case 6:
     266           0 :       character += static_cast<unsigned char>(*sequence++);
     267           0 :       character <<= 6;
     268             :       V8_FALLTHROUGH;
     269             :     case 5:
     270           0 :       character += static_cast<unsigned char>(*sequence++);
     271           0 :       character <<= 6;
     272             :       V8_FALLTHROUGH;
     273             :     case 4:
     274           0 :       character += static_cast<unsigned char>(*sequence++);
     275           0 :       character <<= 6;
     276             :       V8_FALLTHROUGH;
     277             :     case 3:
     278           0 :       character += static_cast<unsigned char>(*sequence++);
     279           0 :       character <<= 6;
     280             :       V8_FALLTHROUGH;
     281             :     case 2:
     282           0 :       character += static_cast<unsigned char>(*sequence++);
     283           0 :       character <<= 6;
     284             :       V8_FALLTHROUGH;
     285             :     case 1:
     286           0 :       character += static_cast<unsigned char>(*sequence++);
     287             :   }
     288             : 
     289           0 :   return character - offsetsFromUTF8[length - 1];
     290             : }
     291             : 
     292           0 : ConversionResult convertUTF8ToUTF16(const char** sourceStart,
     293             :                                     const char* sourceEnd, UChar** targetStart,
     294             :                                     UChar* targetEnd, bool* sourceAllASCII,
     295             :                                     bool strict) {
     296             :   ConversionResult result = conversionOK;
     297           0 :   const char* source = *sourceStart;
     298           0 :   UChar* target = *targetStart;
     299             :   UChar orAllData = 0;
     300           0 :   while (source < sourceEnd) {
     301           0 :     int utf8SequenceLength = inlineUTF8SequenceLength(*source);
     302           0 :     if (sourceEnd - source < utf8SequenceLength) {
     303             :       result = sourceExhausted;
     304             :       break;
     305             :     }
     306             :     // Do this check whether lenient or strict
     307           0 :     if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source),
     308             :                      utf8SequenceLength)) {
     309             :       result = sourceIllegal;
     310             :       break;
     311             :     }
     312             : 
     313           0 :     UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
     314             : 
     315           0 :     if (target >= targetEnd) {
     316           0 :       source -= utf8SequenceLength;  // Back up source pointer!
     317             :       result = targetExhausted;
     318           0 :       break;
     319             :     }
     320             : 
     321           0 :     if (U_IS_BMP(character)) {
     322             :       // UTF-16 surrogate values are illegal in UTF-32
     323           0 :       if (U_IS_SURROGATE(character)) {
     324           0 :         if (strict) {
     325           0 :           source -= utf8SequenceLength;  // return to the illegal value itself
     326             :           result = sourceIllegal;
     327           0 :           break;
     328             :         }
     329           0 :         *target++ = replacementCharacter;
     330           0 :         orAllData |= replacementCharacter;
     331             :       } else {
     332           0 :         *target++ = static_cast<UChar>(character);  // normal case
     333           0 :         orAllData |= character;
     334             :       }
     335           0 :     } else if (U_IS_SUPPLEMENTARY(character)) {
     336             :       // target is a character in range 0xFFFF - 0x10FFFF
     337           0 :       if (target + 1 >= targetEnd) {
     338           0 :         source -= utf8SequenceLength;  // Back up source pointer!
     339             :         result = targetExhausted;
     340           0 :         break;
     341             :       }
     342           0 :       *target++ = U16_LEAD(character);
     343           0 :       *target++ = U16_TRAIL(character);
     344             :       orAllData = 0xFFFF;
     345             :     } else {
     346           0 :       if (strict) {
     347           0 :         source -= utf8SequenceLength;  // return to the start
     348             :         result = sourceIllegal;
     349           0 :         break;  // Bail out; shouldn't continue
     350             :       } else {
     351           0 :         *target++ = replacementCharacter;
     352           0 :         orAllData |= replacementCharacter;
     353             :       }
     354             :     }
     355             :   }
     356           0 :   *sourceStart = source;
     357           0 :   *targetStart = target;
     358             : 
     359           0 :   if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F);
     360             : 
     361           0 :   return result;
     362             : }
     363             : 
     364             : // Helper to write a three-byte UTF-8 code point to the buffer, caller must
     365             : // check room is available.
     366             : static inline void putUTF8Triple(char*& buffer, UChar ch) {
     367           0 :   *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
     368           0 :   *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
     369           0 :   *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
     370             : }
     371             : 
     372             : }  // namespace
     373             : 
     374     9388042 : String16::String16(const UChar* characters, size_t size)
     375    28145942 :     : m_impl(characters, size) {}
     376             : 
     377           0 : String16::String16(const UChar* characters) : m_impl(characters) {}
     378             : 
     379    97931964 : String16::String16(const char* characters)
     380   106654731 :     : String16(characters, std::strlen(characters)) {}
     381             : 
     382   213313780 : String16::String16(const char* characters, size_t size) {
     383   106656890 :   m_impl.resize(size);
     384  1255673240 :   for (size_t i = 0; i < size; ++i) m_impl[i] = characters[i];
     385   106656890 : }
     386             : 
     387     1506558 : String16::String16(const std::basic_string<UChar>& impl) : m_impl(impl) {}
     388             : 
     389             : // static
     390     8360709 : String16 String16::fromInteger(int number) {
     391             :   char arr[50];
     392             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     393    16721418 :   return String16(IntToCString(number, buffer));
     394             : }
     395             : 
     396             : // static
     397       63591 : String16 String16::fromInteger(size_t number) {
     398             :   const size_t kBufferSize = 50;
     399             :   char buffer[kBufferSize];
     400             : #if !defined(_WIN32) && !defined(_WIN64)
     401       63591 :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     402             : #else
     403             :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     404             : #endif
     405       63591 :   return String16(buffer);
     406             : }
     407             : 
     408             : // static
     409      298462 : String16 String16::fromDouble(double number) {
     410             :   char arr[50];
     411             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     412      596924 :   return String16(DoubleToCString(number, buffer));
     413             : }
     414             : 
     415             : // static
     416           5 : String16 String16::fromDouble(double number, int precision) {
     417             :   std::unique_ptr<char[]> str(
     418           5 :       v8::internal::DoubleToPrecisionCString(number, precision));
     419           5 :   return String16(str.get());
     420             : }
     421             : 
     422         130 : int64_t String16::toInteger64(bool* ok) const {
     423      151495 :   return charactersToInteger(characters16(), length(), ok);
     424             : }
     425             : 
     426      151365 : int String16::toInteger(bool* ok) const {
     427             :   int64_t result = toInteger64(ok);
     428      151365 :   if (ok && *ok) {
     429      147767 :     *ok = result <= std::numeric_limits<int>::max() &&
     430      147767 :           result >= std::numeric_limits<int>::min();
     431             :   }
     432      151365 :   return static_cast<int>(result);
     433             : }
     434             : 
     435         245 : String16 String16::stripWhiteSpace() const {
     436         245 :   if (!length()) return String16();
     437             : 
     438             :   size_t start = 0;
     439         240 :   size_t end = length() - 1;
     440             : 
     441             :   // skip white space from start
     442        1295 :   while (start <= end && isSpaceOrNewLine(characters16()[start])) ++start;
     443             : 
     444             :   // only white space
     445         240 :   if (start > end) return String16();
     446             : 
     447             :   // skip white space from end
     448         745 :   while (end && isSpaceOrNewLine(characters16()[end])) --end;
     449             : 
     450         235 :   if (!start && end == length() - 1) return *this;
     451         110 :   return String16(characters16() + start, end + 1 - start);
     452             : }
     453             : 
     454             : String16Builder::String16Builder() = default;
     455             : 
     456    59000754 : void String16Builder::append(const String16& s) {
     457             :   m_buffer.insert(m_buffer.end(), s.characters16(),
     458   118001508 :                   s.characters16() + s.length());
     459    59000754 : }
     460             : 
     461  1299956842 : void String16Builder::append(UChar c) { m_buffer.push_back(c); }
     462             : 
     463    99521658 : void String16Builder::append(char c) {
     464    99521658 :   UChar u = c;
     465    99521658 :   m_buffer.push_back(u);
     466    99521658 : }
     467             : 
     468           0 : void String16Builder::append(const UChar* characters, size_t length) {
     469           0 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     470           0 : }
     471             : 
     472    10232306 : void String16Builder::append(const char* characters, size_t length) {
     473    20464612 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     474    10232306 : }
     475             : 
     476        6867 : void String16Builder::appendNumber(int number) {
     477             :   constexpr int kBufferSize = 11;
     478             :   char buffer[kBufferSize];
     479        6867 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%d", number);
     480             :   DCHECK_LE(0, chars);
     481       13734 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     482        6867 : }
     483             : 
     484         250 : void String16Builder::appendNumber(size_t number) {
     485             :   constexpr int kBufferSize = 20;
     486             :   char buffer[kBufferSize];
     487             : #if !defined(_WIN32) && !defined(_WIN64)
     488         250 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     489             : #else
     490             :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     491             : #endif
     492             :   DCHECK_LE(0, chars);
     493         500 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     494         250 : }
     495             : 
     496           0 : void String16Builder::appendUnsignedAsHex(uint64_t number) {
     497             :   constexpr int kBufferSize = 17;
     498             :   char buffer[kBufferSize];
     499             :   int chars =
     500           0 :       v8::base::OS::SNPrintF(buffer, kBufferSize, "%016" PRIx64, number);
     501             :   DCHECK_LE(0, chars);
     502           0 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     503           0 : }
     504             : 
     505      280102 : void String16Builder::appendUnsignedAsHex(uint32_t number) {
     506             :   constexpr int kBufferSize = 9;
     507             :   char buffer[kBufferSize];
     508      280102 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%08" PRIx32, number);
     509             :   DCHECK_LE(0, chars);
     510      560204 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     511      280102 : }
     512             : 
     513     4684819 : String16 String16Builder::toString() {
     514     4684819 :   return String16(m_buffer.data(), m_buffer.size());
     515             : }
     516             : 
     517     1747079 : void String16Builder::reserveCapacity(size_t capacity) {
     518     1747079 :   m_buffer.reserve(capacity);
     519     1747079 : }
     520             : 
     521           0 : String16 String16::fromUTF8(const char* stringStart, size_t length) {
     522           0 :   if (!stringStart || !length) return String16();
     523             : 
     524           0 :   std::vector<UChar> buffer(length);
     525             :   UChar* bufferStart = buffer.data();
     526             : 
     527           0 :   UChar* bufferCurrent = bufferStart;
     528           0 :   const char* stringCurrent = stringStart;
     529           0 :   if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
     530             :                          bufferCurrent + buffer.size(), nullptr,
     531             :                          true) != conversionOK)
     532           0 :     return String16();
     533             : 
     534           0 :   size_t utf16Length = bufferCurrent - bufferStart;
     535             :   return String16(bufferStart, utf16Length);
     536             : }
     537             : 
     538           0 : std::string String16::utf8() const {
     539             :   size_t length = this->length();
     540             : 
     541           0 :   if (!length) return std::string("");
     542             : 
     543             :   // Allocate a buffer big enough to hold all the characters
     544             :   // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
     545             :   // Optimization ideas, if we find this function is hot:
     546             :   //  * We could speculatively create a CStringBuffer to contain 'length'
     547             :   //    characters, and resize if necessary (i.e. if the buffer contains
     548             :   //    non-ascii characters). (Alternatively, scan the buffer first for
     549             :   //    ascii characters, so we know this will be sufficient).
     550             :   //  * We could allocate a CStringBuffer with an appropriate size to
     551             :   //    have a good chance of being able to write the string into the
     552             :   //    buffer without reallocing (say, 1.5 x length).
     553           0 :   if (length > std::numeric_limits<unsigned>::max() / 3) return std::string();
     554             : 
     555           0 :   std::string output(length * 3, '\0');
     556           0 :   const UChar* characters = m_impl.data();
     557           0 :   const UChar* characters_end = characters + length;
     558           0 :   char* buffer = &*output.begin();
     559             :   char* buffer_end = &*output.end();
     560           0 :   while (characters < characters_end) {
     561             :     // Use strict conversion to detect unpaired surrogates.
     562             :     ConversionResult result = convertUTF16ToUTF8(
     563           0 :         &characters, characters_end, &buffer, buffer_end, /* strict= */ true);
     564             :     DCHECK_NE(result, targetExhausted);
     565             :     // Conversion fails when there is an unpaired surrogate.  Put
     566             :     // replacement character (U+FFFD) instead of the unpaired
     567             :     // surrogate.
     568           0 :     if (result != conversionOK) {
     569             :       DCHECK_LE(0xD800, *characters);
     570             :       DCHECK_LE(*characters, 0xDFFF);
     571             :       // There should be room left, since one UChar hasn't been
     572             :       // converted.
     573             :       DCHECK_LE(buffer + 3, buffer_end);
     574             :       putUTF8Triple(buffer, replacementCharacter);
     575           0 :       ++characters;
     576             :     }
     577             :   }
     578             : 
     579           0 :   output.resize(buffer - output.data());
     580             :   return output;
     581             : }
     582             : 
     583      122036 : }  // namespace v8_inspector

Generated by: LCOV version 1.10