LCOV - code coverage report
Current view: top level - src/inspector - string-16.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 74 217 34.1 %
Date: 2019-02-19 Functions: 23 34 67.6 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/inspector/string-16.h"
       6             : 
       7             : #include <algorithm>
       8             : #include <cctype>
       9             : #include <cstdlib>
      10             : #include <cstring>
      11             : #include <limits>
      12             : #include <string>
      13             : 
      14             : #include "src/base/platform/platform.h"
      15             : #include "src/conversions.h"
      16             : 
      17             : namespace v8_inspector {
      18             : 
      19             : namespace {
      20             : 
      21      203778 : bool isASCII(UChar c) { return !(c & ~0x7F); }
      22             : 
      23             : bool isSpaceOrNewLine(UChar c) {
      24        1045 :   return isASCII(c) && c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
      25             : }
      26             : 
      27      150878 : int64_t charactersToInteger(const UChar* characters, size_t length,
      28             :                             bool* ok = nullptr) {
      29             :   std::vector<char> buffer;
      30      150878 :   buffer.reserve(length + 1);
      31      353611 :   for (size_t i = 0; i < length; ++i) {
      32      405466 :     if (!isASCII(characters[i])) {
      33           0 :       if (ok) *ok = false;
      34             :       return 0;
      35             :     }
      36      405466 :     buffer.push_back(static_cast<char>(characters[i]));
      37             :   }
      38      301756 :   buffer.push_back('\0');
      39             : 
      40             :   char* endptr;
      41             :   int64_t result =
      42      150878 :       static_cast<int64_t>(std::strtoll(buffer.data(), &endptr, 10));
      43      150878 :   if (ok) *ok = !(*endptr);
      44      150878 :   return result;
      45             : }
      46             : 
      47             : const UChar replacementCharacter = 0xFFFD;
      48             : using UChar32 = uint32_t;
      49             : 
      50             : inline int inlineUTF8SequenceLengthNonASCII(char b0) {
      51           0 :   if ((b0 & 0xC0) != 0xC0) return 0;
      52           0 :   if ((b0 & 0xE0) == 0xC0) return 2;
      53           0 :   if ((b0 & 0xF0) == 0xE0) return 3;
      54           0 :   if ((b0 & 0xF8) == 0xF0) return 4;
      55             :   return 0;
      56             : }
      57             : 
      58           0 : inline int inlineUTF8SequenceLength(char b0) {
      59           0 :   return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
      60             : }
      61             : 
      62             : // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
      63             : // into the first byte, depending on how many bytes follow.  There are
      64             : // as many entries in this table as there are UTF-8 sequence types.
      65             : // (I.e., one byte sequence, two byte... etc.). Remember that sequences
      66             : // for *legal* UTF-8 will be 4 or fewer bytes total.
      67             : static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0,
      68             :                                                0xF0, 0xF8, 0xFC};
      69             : 
      70             : typedef enum {
      71             :   conversionOK,     // conversion successful
      72             :   sourceExhausted,  // partial character in source, but hit end
      73             :   targetExhausted,  // insuff. room in target for conversion
      74             :   sourceIllegal     // source sequence is illegal/malformed
      75             : } ConversionResult;
      76             : 
      77           0 : ConversionResult convertUTF16ToUTF8(const UChar** sourceStart,
      78             :                                     const UChar* sourceEnd, char** targetStart,
      79             :                                     char* targetEnd, bool strict) {
      80             :   ConversionResult result = conversionOK;
      81           0 :   const UChar* source = *sourceStart;
      82           0 :   char* target = *targetStart;
      83           0 :   while (source < sourceEnd) {
      84             :     UChar32 ch;
      85             :     uint32_t bytesToWrite = 0;
      86             :     const UChar32 byteMask = 0xBF;
      87             :     const UChar32 byteMark = 0x80;
      88             :     const UChar* oldSource =
      89             :         source;  // In case we have to back up because of target overflow.
      90           0 :     ch = static_cast<uint16_t>(*source++);
      91             :     // If we have a surrogate pair, convert to UChar32 first.
      92           0 :     if (ch >= 0xD800 && ch <= 0xDBFF) {
      93             :       // If the 16 bits following the high surrogate are in the source buffer...
      94           0 :       if (source < sourceEnd) {
      95           0 :         UChar32 ch2 = static_cast<uint16_t>(*source);
      96             :         // If it's a low surrogate, convert to UChar32.
      97           0 :         if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
      98           0 :           ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
      99           0 :           ++source;
     100           0 :         } else if (strict) {  // it's an unpaired high surrogate
     101             :           --source;           // return to the illegal value itself
     102             :           result = sourceIllegal;
     103             :           break;
     104             :         }
     105             :       } else {     // We don't have the 16 bits following the high surrogate.
     106             :         --source;  // return to the high surrogate
     107             :         result = sourceExhausted;
     108             :         break;
     109             :       }
     110           0 :     } else if (strict) {
     111             :       // UTF-16 surrogate values are illegal in UTF-32
     112           0 :       if (ch >= 0xDC00 && ch <= 0xDFFF) {
     113             :         --source;  // return to the illegal value itself
     114             :         result = sourceIllegal;
     115             :         break;
     116             :       }
     117             :     }
     118             :     // Figure out how many bytes the result will require
     119           0 :     if (ch < static_cast<UChar32>(0x80)) {
     120             :       bytesToWrite = 1;
     121           0 :     } else if (ch < static_cast<UChar32>(0x800)) {
     122             :       bytesToWrite = 2;
     123           0 :     } else if (ch < static_cast<UChar32>(0x10000)) {
     124             :       bytesToWrite = 3;
     125           0 :     } else if (ch < static_cast<UChar32>(0x110000)) {
     126             :       bytesToWrite = 4;
     127             :     } else {
     128             :       bytesToWrite = 3;
     129             :       ch = replacementCharacter;
     130             :     }
     131             : 
     132           0 :     target += bytesToWrite;
     133           0 :     if (target > targetEnd) {
     134             :       source = oldSource;  // Back up source pointer!
     135           0 :       target -= bytesToWrite;
     136             :       result = targetExhausted;
     137           0 :       break;
     138             :     }
     139           0 :     switch (bytesToWrite) {
     140             :       case 4:
     141           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     142           0 :         ch >>= 6;
     143             :         V8_FALLTHROUGH;
     144             :       case 3:
     145           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     146           0 :         ch >>= 6;
     147             :         V8_FALLTHROUGH;
     148             :       case 2:
     149           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     150           0 :         ch >>= 6;
     151             :         V8_FALLTHROUGH;
     152             :       case 1:
     153           0 :         *--target = static_cast<char>(ch | firstByteMark[bytesToWrite]);
     154             :     }
     155           0 :     target += bytesToWrite;
     156             :   }
     157           0 :   *sourceStart = source;
     158           0 :   *targetStart = target;
     159           0 :   return result;
     160             : }
     161             : 
     162             : /**
     163             :  * Is this code point a BMP code point (U+0000..U+ffff)?
     164             :  * @param c 32-bit code point
     165             :  * @return TRUE or FALSE
     166             :  * @stable ICU 2.8
     167             :  */
     168             : #define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF)
     169             : 
     170             : /**
     171             :  * Is this code point a supplementary code point (U+010000..U+10FFFF)?
     172             :  * @param c 32-bit code point
     173             :  * @return TRUE or FALSE
     174             :  * @stable ICU 2.8
     175             :  */
     176             : #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF)
     177             : 
     178             : /**
     179             :  * Is this code point a surrogate (U+d800..U+dfff)?
     180             :  * @param c 32-bit code point
     181             :  * @return TRUE or FALSE
     182             :  * @stable ICU 2.4
     183             :  */
     184             : #define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800)
     185             : 
     186             : /**
     187             :  * Get the lead surrogate (0xD800..0xDBFF) for a
     188             :  * supplementary code point (0x010000..0x10FFFF).
     189             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     190             :  * @return lead surrogate (U+D800..U+DBFF) for supplementary
     191             :  * @stable ICU 2.4
     192             :  */
     193             : #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0)
     194             : 
     195             : /**
     196             :  * Get the trail surrogate (0xDC00..0xDFFF) for a
     197             :  * supplementary code point (0x010000..0x10FFFF).
     198             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     199             :  * @return trail surrogate (U+DC00..U+DFFF) for supplementary
     200             :  * @stable ICU 2.4
     201             :  */
     202             : #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00)
     203             : 
     204             : // This must be called with the length pre-determined by the first byte.
     205             : // If presented with a length > 4, this returns false.  The Unicode
     206             : // definition of UTF-8 goes up to 4-byte sequences.
     207           0 : static bool isLegalUTF8(const unsigned char* source, int length) {
     208             :   unsigned char a;
     209           0 :   const unsigned char* srcptr = source + length;
     210           0 :   switch (length) {
     211             :     default:
     212             :       return false;
     213             :     // Everything else falls through when "true"...
     214             :     case 4:
     215           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     216             :       V8_FALLTHROUGH;
     217             :     case 3:
     218           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     219             :       V8_FALLTHROUGH;
     220             :     case 2:
     221           0 :       if ((a = (*--srcptr)) > 0xBF) return false;
     222             : 
     223             :       // no fall-through in this inner switch
     224           0 :       switch (*source) {
     225             :         case 0xE0:
     226           0 :           if (a < 0xA0) return false;
     227             :           break;
     228             :         case 0xED:
     229           0 :           if (a > 0x9F) return false;
     230             :           break;
     231             :         case 0xF0:
     232           0 :           if (a < 0x90) return false;
     233             :           break;
     234             :         case 0xF4:
     235           0 :           if (a > 0x8F) return false;
     236             :           break;
     237             :         default:
     238           0 :           if (a < 0x80) return false;
     239             :       }
     240             :       V8_FALLTHROUGH;
     241             : 
     242             :     case 1:
     243           0 :       if (*source >= 0x80 && *source < 0xC2) return false;
     244             :   }
     245           0 :   if (*source > 0xF4) return false;
     246           0 :   return true;
     247             : }
     248             : 
     249             : // Magic values subtracted from a buffer value during UTF8 conversion.
     250             : // This table contains as many values as there might be trailing bytes
     251             : // in a UTF-8 sequence.
     252             : static const UChar32 offsetsFromUTF8[6] = {0x00000000UL,
     253             :                                            0x00003080UL,
     254             :                                            0x000E2080UL,
     255             :                                            0x03C82080UL,
     256             :                                            static_cast<UChar32>(0xFA082080UL),
     257             :                                            static_cast<UChar32>(0x82082080UL)};
     258             : 
     259           0 : static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) {
     260             :   UChar32 character = 0;
     261             : 
     262             :   // The cases all fall through.
     263           0 :   switch (length) {
     264             :     case 6:
     265           0 :       character += static_cast<unsigned char>(*sequence++);
     266           0 :       character <<= 6;
     267             :       V8_FALLTHROUGH;
     268             :     case 5:
     269           0 :       character += static_cast<unsigned char>(*sequence++);
     270           0 :       character <<= 6;
     271             :       V8_FALLTHROUGH;
     272             :     case 4:
     273           0 :       character += static_cast<unsigned char>(*sequence++);
     274           0 :       character <<= 6;
     275             :       V8_FALLTHROUGH;
     276             :     case 3:
     277           0 :       character += static_cast<unsigned char>(*sequence++);
     278           0 :       character <<= 6;
     279             :       V8_FALLTHROUGH;
     280             :     case 2:
     281           0 :       character += static_cast<unsigned char>(*sequence++);
     282           0 :       character <<= 6;
     283             :       V8_FALLTHROUGH;
     284             :     case 1:
     285           0 :       character += static_cast<unsigned char>(*sequence++);
     286             :   }
     287             : 
     288           0 :   return character - offsetsFromUTF8[length - 1];
     289             : }
     290             : 
     291           0 : ConversionResult convertUTF8ToUTF16(const char** sourceStart,
     292             :                                     const char* sourceEnd, UChar** targetStart,
     293             :                                     UChar* targetEnd, bool* sourceAllASCII,
     294             :                                     bool strict) {
     295             :   ConversionResult result = conversionOK;
     296           0 :   const char* source = *sourceStart;
     297           0 :   UChar* target = *targetStart;
     298             :   UChar orAllData = 0;
     299           0 :   while (source < sourceEnd) {
     300           0 :     int utf8SequenceLength = inlineUTF8SequenceLength(*source);
     301           0 :     if (sourceEnd - source < utf8SequenceLength) {
     302             :       result = sourceExhausted;
     303             :       break;
     304             :     }
     305             :     // Do this check whether lenient or strict
     306           0 :     if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source),
     307           0 :                      utf8SequenceLength)) {
     308             :       result = sourceIllegal;
     309             :       break;
     310             :     }
     311             : 
     312           0 :     UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
     313             : 
     314           0 :     if (target >= targetEnd) {
     315           0 :       source -= utf8SequenceLength;  // Back up source pointer!
     316             :       result = targetExhausted;
     317           0 :       break;
     318             :     }
     319             : 
     320           0 :     if (U_IS_BMP(character)) {
     321             :       // UTF-16 surrogate values are illegal in UTF-32
     322           0 :       if (U_IS_SURROGATE(character)) {
     323           0 :         if (strict) {
     324           0 :           source -= utf8SequenceLength;  // return to the illegal value itself
     325             :           result = sourceIllegal;
     326           0 :           break;
     327             :         }
     328           0 :         *target++ = replacementCharacter;
     329           0 :         orAllData |= replacementCharacter;
     330             :       } else {
     331           0 :         *target++ = static_cast<UChar>(character);  // normal case
     332           0 :         orAllData |= character;
     333             :       }
     334           0 :     } else if (U_IS_SUPPLEMENTARY(character)) {
     335             :       // target is a character in range 0xFFFF - 0x10FFFF
     336           0 :       if (target + 1 >= targetEnd) {
     337           0 :         source -= utf8SequenceLength;  // Back up source pointer!
     338             :         result = targetExhausted;
     339           0 :         break;
     340             :       }
     341           0 :       *target++ = U16_LEAD(character);
     342           0 :       *target++ = U16_TRAIL(character);
     343             :       orAllData = 0xFFFF;
     344             :     } else {
     345           0 :       if (strict) {
     346           0 :         source -= utf8SequenceLength;  // return to the start
     347             :         result = sourceIllegal;
     348           0 :         break;  // Bail out; shouldn't continue
     349             :       } else {
     350           0 :         *target++ = replacementCharacter;
     351           0 :         orAllData |= replacementCharacter;
     352             :       }
     353             :     }
     354             :   }
     355           0 :   *sourceStart = source;
     356           0 :   *targetStart = target;
     357             : 
     358           0 :   if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F);
     359             : 
     360           0 :   return result;
     361             : }
     362             : 
     363             : // Helper to write a three-byte UTF-8 code point to the buffer, caller must
     364             : // check room is available.
     365           0 : static inline void putUTF8Triple(char*& buffer, UChar ch) {
     366           0 :   *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
     367           0 :   *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
     368           0 :   *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
     369           0 : }
     370             : 
     371             : }  // namespace
     372             : 
     373     9425167 : String16::String16(const UChar* characters, size_t size)
     374    14225821 :     : m_impl(characters, size) {}
     375             : 
     376           0 : String16::String16(const UChar* characters) : m_impl(characters) {}
     377             : 
     378   104571274 : String16::String16(const char* characters)
     379   104571274 :     : String16(characters, std::strlen(characters)) {}
     380             : 
     381   209146530 : String16::String16(const char* characters, size_t size) {
     382   104573265 :   m_impl.resize(size);
     383   675415700 :   for (size_t i = 0; i < size; ++i) m_impl[i] = characters[i];
     384   104573265 : }
     385             : 
     386     1529370 : String16::String16(const std::basic_string<UChar>& impl) : m_impl(impl) {}
     387             : 
     388             : // static
     389     8403714 : String16 String16::fromInteger(int number) {
     390             :   char arr[50];
     391             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     392     8403714 :   return String16(IntToCString(number, buffer));
     393             : }
     394             : 
     395             : // static
     396       63526 : String16 String16::fromInteger(size_t number) {
     397             :   const size_t kBufferSize = 50;
     398             :   char buffer[kBufferSize];
     399             : #if !defined(_WIN32) && !defined(_WIN64)
     400       63526 :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     401             : #else
     402             :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     403             : #endif
     404       63526 :   return String16(buffer);
     405             : }
     406             : 
     407             : // static
     408      297487 : String16 String16::fromDouble(double number) {
     409             :   char arr[50];
     410             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     411      297487 :   return String16(DoubleToCString(number, buffer));
     412             : }
     413             : 
     414             : // static
     415           5 : String16 String16::fromDouble(double number, int precision) {
     416             :   std::unique_ptr<char[]> str(
     417           5 :       v8::internal::DoubleToPrecisionCString(number, precision));
     418          10 :   return String16(str.get());
     419             : }
     420             : 
     421         130 : int64_t String16::toInteger64(bool* ok) const {
     422      150878 :   return charactersToInteger(characters16(), length(), ok);
     423             : }
     424             : 
     425      150748 : int String16::toInteger(bool* ok) const {
     426             :   int64_t result = toInteger64(ok);
     427      150748 :   if (ok && *ok) {
     428      147201 :     *ok = result <= std::numeric_limits<int>::max() &&
     429      147201 :           result >= std::numeric_limits<int>::min();
     430             :   }
     431      150748 :   return static_cast<int>(result);
     432             : }
     433             : 
     434         260 : String16 String16::stripWhiteSpace() const {
     435         260 :   if (!length()) return String16();
     436             : 
     437             :   size_t start = 0;
     438         255 :   size_t end = length() - 1;
     439             : 
     440             :   // skip white space from start
     441         915 :   while (start <= end && isSpaceOrNewLine(characters16()[start])) ++start;
     442             : 
     443             :   // only white space
     444         255 :   if (start > end) return String16();
     445             : 
     446             :   // skip white space from end
     447         525 :   while (end && isSpaceOrNewLine(characters16()[end])) --end;
     448             : 
     449         250 :   if (!start && end == length() - 1) return *this;
     450         110 :   return String16(characters16() + start, end + 1 - start);
     451             : }
     452             : 
     453             : String16Builder::String16Builder() = default;
     454             : 
     455    56772905 : void String16Builder::append(const String16& s) {
     456             :   m_buffer.insert(m_buffer.end(), s.characters16(),
     457   113545810 :                   s.characters16() + s.length());
     458    56772905 : }
     459             : 
     460  1292468023 : void String16Builder::append(UChar c) { m_buffer.push_back(c); }
     461             : 
     462    99781168 : void String16Builder::append(char c) {
     463    99781168 :   UChar u = c;
     464    99781168 :   m_buffer.push_back(u);
     465    99781168 : }
     466             : 
     467           0 : void String16Builder::append(const UChar* characters, size_t length) {
     468           0 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     469           0 : }
     470             : 
     471    10239025 : void String16Builder::append(const char* characters, size_t length) {
     472    20478050 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     473    10239025 : }
     474             : 
     475        6775 : void String16Builder::appendNumber(int number) {
     476             :   constexpr int kBufferSize = 11;
     477             :   char buffer[kBufferSize];
     478        6775 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%d", number);
     479             :   DCHECK_LE(0, chars);
     480       13550 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     481        6775 : }
     482             : 
     483         235 : void String16Builder::appendNumber(size_t number) {
     484             :   constexpr int kBufferSize = 20;
     485             :   char buffer[kBufferSize];
     486             : #if !defined(_WIN32) && !defined(_WIN64)
     487         235 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     488             : #else
     489             :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     490             : #endif
     491             :   DCHECK_LE(0, chars);
     492         470 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     493         235 : }
     494             : 
     495           0 : void String16Builder::appendUnsignedAsHex(uint64_t number) {
     496             :   constexpr int kBufferSize = 17;
     497             :   char buffer[kBufferSize];
     498             :   int chars =
     499           0 :       v8::base::OS::SNPrintF(buffer, kBufferSize, "%016" PRIx64, number);
     500             :   DCHECK_LE(0, chars);
     501           0 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     502           0 : }
     503             : 
     504      302299 : void String16Builder::appendUnsignedAsHex(uint32_t number) {
     505             :   constexpr int kBufferSize = 9;
     506             :   char buffer[kBufferSize];
     507      302299 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%08" PRIx32, number);
     508             :   DCHECK_LE(0, chars);
     509      604598 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     510      302299 : }
     511             : 
     512     4800544 : String16 String16Builder::toString() {
     513     9601088 :   return String16(m_buffer.data(), m_buffer.size());
     514             : }
     515             : 
     516     1860960 : void String16Builder::reserveCapacity(size_t capacity) {
     517     1860960 :   m_buffer.reserve(capacity);
     518     1860960 : }
     519             : 
     520           0 : String16 String16::fromUTF8(const char* stringStart, size_t length) {
     521           0 :   if (!stringStart || !length) return String16();
     522             : 
     523           0 :   std::vector<UChar> buffer(length);
     524             :   UChar* bufferStart = buffer.data();
     525             : 
     526           0 :   UChar* bufferCurrent = bufferStart;
     527           0 :   const char* stringCurrent = stringStart;
     528           0 :   if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
     529           0 :                          bufferCurrent + buffer.size(), nullptr,
     530           0 :                          true) != conversionOK)
     531           0 :     return String16();
     532             : 
     533           0 :   size_t utf16Length = bufferCurrent - bufferStart;
     534             :   return String16(bufferStart, utf16Length);
     535             : }
     536             : 
     537           0 : std::string String16::utf8() const {
     538             :   size_t length = this->length();
     539             : 
     540           0 :   if (!length) return std::string("");
     541             : 
     542             :   // Allocate a buffer big enough to hold all the characters
     543             :   // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
     544             :   // Optimization ideas, if we find this function is hot:
     545             :   //  * We could speculatively create a CStringBuffer to contain 'length'
     546             :   //    characters, and resize if necessary (i.e. if the buffer contains
     547             :   //    non-ascii characters). (Alternatively, scan the buffer first for
     548             :   //    ascii characters, so we know this will be sufficient).
     549             :   //  * We could allocate a CStringBuffer with an appropriate size to
     550             :   //    have a good chance of being able to write the string into the
     551             :   //    buffer without reallocing (say, 1.5 x length).
     552           0 :   if (length > std::numeric_limits<unsigned>::max() / 3) return std::string();
     553             : 
     554           0 :   std::string output(length * 3, '\0');
     555           0 :   const UChar* characters = m_impl.data();
     556           0 :   const UChar* characters_end = characters + length;
     557           0 :   char* buffer = &*output.begin();
     558           0 :   char* buffer_end = &*output.end();
     559           0 :   while (characters < characters_end) {
     560             :     // Use strict conversion to detect unpaired surrogates.
     561             :     ConversionResult result = convertUTF16ToUTF8(
     562           0 :         &characters, characters_end, &buffer, buffer_end, /* strict= */ true);
     563             :     DCHECK_NE(result, targetExhausted);
     564             :     // Conversion fails when there is an unpaired surrogate.  Put
     565             :     // replacement character (U+FFFD) instead of the unpaired
     566             :     // surrogate.
     567           0 :     if (result != conversionOK) {
     568             :       DCHECK_LE(0xD800, *characters);
     569             :       DCHECK_LE(*characters, 0xDFFF);
     570             :       // There should be room left, since one UChar hasn't been
     571             :       // converted.
     572             :       DCHECK_LE(buffer + 3, buffer_end);
     573           0 :       putUTF8Triple(buffer, replacementCharacter);
     574           0 :       ++characters;
     575             :     }
     576             :   }
     577             : 
     578           0 :   output.resize(buffer - output.data());
     579             :   return output;
     580             : }
     581             : 
     582      178779 : }  // namespace v8_inspector

Generated by: LCOV version 1.10