LCOV - code coverage report
Current view: top level - src/inspector - string-16.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 79 217 36.4 %
Date: 2019-01-20 Functions: 25 36 69.4 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/inspector/string-16.h"
       6             : 
       7             : #include <algorithm>
       8             : #include <cctype>
       9             : #include <cstdlib>
      10             : #include <cstring>
      11             : #include <limits>
      12             : #include <string>
      13             : 
      14             : #include "src/base/platform/platform.h"
      15             : #include "src/conversions.h"
      16             : 
      17             : namespace v8_inspector {
      18             : 
      19             : namespace {
      20             : 
      21      204339 : bool isASCII(UChar c) { return !(c & ~0x7F); }
      22             : 
      23             : bool isSpaceOrNewLine(UChar c) {
      24        1045 :   return isASCII(c) && c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
      25             : }
      26             : 
      27      151403 : int64_t charactersToInteger(const UChar* characters, size_t length,
      28             :                             bool* ok = nullptr) {
      29             :   std::vector<char> buffer;
      30      151403 :   buffer.reserve(length + 1);
      31      354697 :   for (size_t i = 0; i < length; ++i) {
      32      406588 :     if (!isASCII(characters[i])) {
      33           0 :       if (ok) *ok = false;
      34             :       return 0;
      35             :     }
      36      406588 :     buffer.push_back(static_cast<char>(characters[i]));
      37             :   }
      38      302806 :   buffer.push_back('\0');
      39             : 
      40             :   char* endptr;
      41             :   int64_t result =
      42      151403 :       static_cast<int64_t>(std::strtoll(buffer.data(), &endptr, 10));
      43      151403 :   if (ok) *ok = !(*endptr);
      44      151403 :   return result;
      45             : }
      46             : 
      47             : const UChar replacementCharacter = 0xFFFD;
      48             : using UChar32 = uint32_t;
      49             : 
      50             : inline int inlineUTF8SequenceLengthNonASCII(char b0) {
      51           0 :   if ((b0 & 0xC0) != 0xC0) return 0;
      52           0 :   if ((b0 & 0xE0) == 0xC0) return 2;
      53           0 :   if ((b0 & 0xF0) == 0xE0) return 3;
      54           0 :   if ((b0 & 0xF8) == 0xF0) return 4;
      55             :   return 0;
      56             : }
      57             : 
      58           0 : inline int inlineUTF8SequenceLength(char b0) {
      59           0 :   return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
      60             : }
      61             : 
      62             : // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
      63             : // into the first byte, depending on how many bytes follow.  There are
      64             : // as many entries in this table as there are UTF-8 sequence types.
      65             : // (I.e., one byte sequence, two byte... etc.). Remember that sequences
      66             : // for *legal* UTF-8 will be 4 or fewer bytes total.
      67             : static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0,
      68             :                                                0xF0, 0xF8, 0xFC};
      69             : 
      70             : typedef enum {
      71             :   conversionOK,     // conversion successful
      72             :   sourceExhausted,  // partial character in source, but hit end
      73             :   targetExhausted,  // insuff. room in target for conversion
      74             :   sourceIllegal     // source sequence is illegal/malformed
      75             : } ConversionResult;
      76             : 
      77           0 : ConversionResult convertUTF16ToUTF8(const UChar** sourceStart,
      78             :                                     const UChar* sourceEnd, char** targetStart,
      79             :                                     char* targetEnd, bool strict) {
      80             :   ConversionResult result = conversionOK;
      81           0 :   const UChar* source = *sourceStart;
      82           0 :   char* target = *targetStart;
      83           0 :   while (source < sourceEnd) {
      84             :     UChar32 ch;
      85             :     uint32_t bytesToWrite = 0;
      86             :     const UChar32 byteMask = 0xBF;
      87             :     const UChar32 byteMark = 0x80;
      88             :     const UChar* oldSource =
      89             :         source;  // In case we have to back up because of target overflow.
      90           0 :     ch = static_cast<uint16_t>(*source++);
      91             :     // If we have a surrogate pair, convert to UChar32 first.
      92           0 :     if (ch >= 0xD800 && ch <= 0xDBFF) {
      93             :       // If the 16 bits following the high surrogate are in the source buffer...
      94           0 :       if (source < sourceEnd) {
      95           0 :         UChar32 ch2 = static_cast<uint16_t>(*source);
      96             :         // If it's a low surrogate, convert to UChar32.
      97           0 :         if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
      98           0 :           ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
      99           0 :           ++source;
     100           0 :         } else if (strict) {  // it's an unpaired high surrogate
     101             :           --source;           // return to the illegal value itself
     102             :           result = sourceIllegal;
     103             :           break;
     104             :         }
     105             :       } else {     // We don't have the 16 bits following the high surrogate.
     106             :         --source;  // return to the high surrogate
     107             :         result = sourceExhausted;
     108             :         break;
     109             :       }
     110           0 :     } else if (strict) {
     111             :       // UTF-16 surrogate values are illegal in UTF-32
     112           0 :       if (ch >= 0xDC00 && ch <= 0xDFFF) {
     113             :         --source;  // return to the illegal value itself
     114             :         result = sourceIllegal;
     115             :         break;
     116             :       }
     117             :     }
     118             :     // Figure out how many bytes the result will require
     119           0 :     if (ch < static_cast<UChar32>(0x80)) {
     120             :       bytesToWrite = 1;
     121           0 :     } else if (ch < static_cast<UChar32>(0x800)) {
     122             :       bytesToWrite = 2;
     123           0 :     } else if (ch < static_cast<UChar32>(0x10000)) {
     124             :       bytesToWrite = 3;
     125           0 :     } else if (ch < static_cast<UChar32>(0x110000)) {
     126             :       bytesToWrite = 4;
     127             :     } else {
     128             :       bytesToWrite = 3;
     129             :       ch = replacementCharacter;
     130             :     }
     131             : 
     132           0 :     target += bytesToWrite;
     133           0 :     if (target > targetEnd) {
     134             :       source = oldSource;  // Back up source pointer!
     135           0 :       target -= bytesToWrite;
     136             :       result = targetExhausted;
     137           0 :       break;
     138             :     }
     139           0 :     switch (bytesToWrite) {
     140             :       case 4:
     141           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     142           0 :         ch >>= 6;
     143             :         V8_FALLTHROUGH;
     144             :       case 3:
     145           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     146           0 :         ch >>= 6;
     147             :         V8_FALLTHROUGH;
     148             :       case 2:
     149           0 :         *--target = static_cast<char>((ch | byteMark) & byteMask);
     150           0 :         ch >>= 6;
     151             :         V8_FALLTHROUGH;
     152             :       case 1:
     153           0 :         *--target = static_cast<char>(ch | firstByteMark[bytesToWrite]);
     154             :     }
     155           0 :     target += bytesToWrite;
     156             :   }
     157           0 :   *sourceStart = source;
     158           0 :   *targetStart = target;
     159           0 :   return result;
     160             : }
     161             : 
     162             : /**
     163             :  * Is this code point a BMP code point (U+0000..U+ffff)?
     164             :  * @param c 32-bit code point
     165             :  * @return TRUE or FALSE
     166             :  * @stable ICU 2.8
     167             :  */
     168             : #define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF)
     169             : 
     170             : /**
     171             :  * Is this code point a supplementary code point (U+010000..U+10FFFF)?
     172             :  * @param c 32-bit code point
     173             :  * @return TRUE or FALSE
     174             :  * @stable ICU 2.8
     175             :  */
     176             : #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF)
     177             : 
     178             : /**
     179             :  * Is this code point a surrogate (U+d800..U+dfff)?
     180             :  * @param c 32-bit code point
     181             :  * @return TRUE or FALSE
     182             :  * @stable ICU 2.4
     183             :  */
     184             : #define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800)
     185             : 
     186             : /**
     187             :  * Get the lead surrogate (0xD800..0xDBFF) for a
     188             :  * supplementary code point (0x010000..0x10FFFF).
     189             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     190             :  * @return lead surrogate (U+D800..U+DBFF) for supplementary
     191             :  * @stable ICU 2.4
     192             :  */
     193             : #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0)
     194             : 
     195             : /**
     196             :  * Get the trail surrogate (0xDC00..0xDFFF) for a
     197             :  * supplementary code point (0x010000..0x10FFFF).
     198             :  * @param supplementary 32-bit code point (U+010000..U+10FFFF)
     199             :  * @return trail surrogate (U+DC00..U+DFFF) for supplementary
     200             :  * @stable ICU 2.4
     201             :  */
     202             : #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00)
     203             : 
     204             : // This must be called with the length pre-determined by the first byte.
     205             : // If presented with a length > 4, this returns false.  The Unicode
     206             : // definition of UTF-8 goes up to 4-byte sequences.
     207           0 : static bool isLegalUTF8(const unsigned char* source, int length) {
     208             :   unsigned char a;
     209           0 :   const unsigned char* srcptr = source + length;
     210           0 :   switch (length) {
     211             :     default:
     212             :       return false;
     213             :     // Everything else falls through when "true"...
     214             :     case 4:
     215           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     216             :       V8_FALLTHROUGH;
     217             :     case 3:
     218           0 :       if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
     219             :       V8_FALLTHROUGH;
     220             :     case 2:
     221           0 :       if ((a = (*--srcptr)) > 0xBF) return false;
     222             : 
     223             :       // no fall-through in this inner switch
     224           0 :       switch (*source) {
     225             :         case 0xE0:
     226           0 :           if (a < 0xA0) return false;
     227             :           break;
     228             :         case 0xED:
     229           0 :           if (a > 0x9F) return false;
     230             :           break;
     231             :         case 0xF0:
     232           0 :           if (a < 0x90) return false;
     233             :           break;
     234             :         case 0xF4:
     235           0 :           if (a > 0x8F) return false;
     236             :           break;
     237             :         default:
     238           0 :           if (a < 0x80) return false;
     239             :       }
     240             :       V8_FALLTHROUGH;
     241             : 
     242             :     case 1:
     243           0 :       if (*source >= 0x80 && *source < 0xC2) return false;
     244             :   }
     245           0 :   if (*source > 0xF4) return false;
     246           0 :   return true;
     247             : }
     248             : 
     249             : // Magic values subtracted from a buffer value during UTF8 conversion.
     250             : // This table contains as many values as there might be trailing bytes
     251             : // in a UTF-8 sequence.
     252             : static const UChar32 offsetsFromUTF8[6] = {0x00000000UL,
     253             :                                            0x00003080UL,
     254             :                                            0x000E2080UL,
     255             :                                            0x03C82080UL,
     256             :                                            static_cast<UChar32>(0xFA082080UL),
     257             :                                            static_cast<UChar32>(0x82082080UL)};
     258             : 
     259           0 : static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) {
     260             :   UChar32 character = 0;
     261             : 
     262             :   // The cases all fall through.
     263           0 :   switch (length) {
     264             :     case 6:
     265           0 :       character += static_cast<unsigned char>(*sequence++);
     266           0 :       character <<= 6;
     267             :       V8_FALLTHROUGH;
     268             :     case 5:
     269           0 :       character += static_cast<unsigned char>(*sequence++);
     270           0 :       character <<= 6;
     271             :       V8_FALLTHROUGH;
     272             :     case 4:
     273           0 :       character += static_cast<unsigned char>(*sequence++);
     274           0 :       character <<= 6;
     275             :       V8_FALLTHROUGH;
     276             :     case 3:
     277           0 :       character += static_cast<unsigned char>(*sequence++);
     278           0 :       character <<= 6;
     279             :       V8_FALLTHROUGH;
     280             :     case 2:
     281           0 :       character += static_cast<unsigned char>(*sequence++);
     282           0 :       character <<= 6;
     283             :       V8_FALLTHROUGH;
     284             :     case 1:
     285           0 :       character += static_cast<unsigned char>(*sequence++);
     286             :   }
     287             : 
     288           0 :   return character - offsetsFromUTF8[length - 1];
     289             : }
     290             : 
     291           0 : ConversionResult convertUTF8ToUTF16(const char** sourceStart,
     292             :                                     const char* sourceEnd, UChar** targetStart,
     293             :                                     UChar* targetEnd, bool* sourceAllASCII,
     294             :                                     bool strict) {
     295             :   ConversionResult result = conversionOK;
     296           0 :   const char* source = *sourceStart;
     297           0 :   UChar* target = *targetStart;
     298             :   UChar orAllData = 0;
     299           0 :   while (source < sourceEnd) {
     300           0 :     int utf8SequenceLength = inlineUTF8SequenceLength(*source);
     301           0 :     if (sourceEnd - source < utf8SequenceLength) {
     302             :       result = sourceExhausted;
     303             :       break;
     304             :     }
     305             :     // Do this check whether lenient or strict
     306           0 :     if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source),
     307           0 :                      utf8SequenceLength)) {
     308             :       result = sourceIllegal;
     309             :       break;
     310             :     }
     311             : 
     312           0 :     UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
     313             : 
     314           0 :     if (target >= targetEnd) {
     315           0 :       source -= utf8SequenceLength;  // Back up source pointer!
     316             :       result = targetExhausted;
     317           0 :       break;
     318             :     }
     319             : 
     320           0 :     if (U_IS_BMP(character)) {
     321             :       // UTF-16 surrogate values are illegal in UTF-32
     322           0 :       if (U_IS_SURROGATE(character)) {
     323           0 :         if (strict) {
     324           0 :           source -= utf8SequenceLength;  // return to the illegal value itself
     325             :           result = sourceIllegal;
     326           0 :           break;
     327             :         }
     328           0 :         *target++ = replacementCharacter;
     329           0 :         orAllData |= replacementCharacter;
     330             :       } else {
     331           0 :         *target++ = static_cast<UChar>(character);  // normal case
     332           0 :         orAllData |= character;
     333             :       }
     334           0 :     } else if (U_IS_SUPPLEMENTARY(character)) {
     335             :       // target is a character in range 0xFFFF - 0x10FFFF
     336           0 :       if (target + 1 >= targetEnd) {
     337           0 :         source -= utf8SequenceLength;  // Back up source pointer!
     338             :         result = targetExhausted;
     339           0 :         break;
     340             :       }
     341           0 :       *target++ = U16_LEAD(character);
     342           0 :       *target++ = U16_TRAIL(character);
     343             :       orAllData = 0xFFFF;
     344             :     } else {
     345           0 :       if (strict) {
     346           0 :         source -= utf8SequenceLength;  // return to the start
     347             :         result = sourceIllegal;
     348           0 :         break;  // Bail out; shouldn't continue
     349             :       } else {
     350           0 :         *target++ = replacementCharacter;
     351           0 :         orAllData |= replacementCharacter;
     352             :       }
     353             :     }
     354             :   }
     355           0 :   *sourceStart = source;
     356           0 :   *targetStart = target;
     357             : 
     358           0 :   if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F);
     359             : 
     360           0 :   return result;
     361             : }
     362             : 
     363             : // Helper to write a three-byte UTF-8 code point to the buffer, caller must
     364             : // check room is available.
     365           0 : static inline void putUTF8Triple(char*& buffer, UChar ch) {
     366           0 :   *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
     367           0 :   *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
     368           0 :   *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
     369           0 : }
     370             : 
     371             : }  // namespace
     372             : 
     373             : String16::String16() = default;
     374             : 
     375             : String16::String16(const String16& other) = default;
     376             : 
     377     9100634 : String16::String16(String16&& other) V8_NOEXCEPT
     378             :     : m_impl(std::move(other.m_impl)),
     379     9100634 :       hash_code(other.hash_code) {}
     380             : 
     381     9380456 : String16::String16(const UChar* characters, size_t size)
     382    14172217 :     : m_impl(characters, size) {}
     383             : 
     384           0 : String16::String16(const UChar* characters) : m_impl(characters) {}
     385             : 
     386   104415524 : String16::String16(const char* characters)
     387   104415524 :     : String16(characters, std::strlen(characters)) {}
     388             : 
     389   208835072 : String16::String16(const char* characters, size_t size) {
     390   104417536 :   m_impl.resize(size);
     391   673173442 :   for (size_t i = 0; i < size; ++i) m_impl[i] = characters[i];
     392   104417536 : }
     393             : 
     394     1533724 : String16::String16(const std::basic_string<UChar>& impl) : m_impl(impl) {}
     395             : 
     396             : String16& String16::operator=(const String16& other) = default;
     397             : 
     398    10416494 : String16& String16::operator=(String16&& other) V8_NOEXCEPT {
     399             :   m_impl = std::move(other.m_impl);
     400    10416494 :   hash_code = other.hash_code;
     401    10416494 :   return *this;
     402             : }
     403             : 
     404             : // static
     405     8374125 : String16 String16::fromInteger(int number) {
     406             :   char arr[50];
     407             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     408     8374125 :   return String16(IntToCString(number, buffer));
     409             : }
     410             : 
     411             : // static
     412       63503 : String16 String16::fromInteger(size_t number) {
     413             :   const size_t kBufferSize = 50;
     414             :   char buffer[kBufferSize];
     415             : #if !defined(_WIN32) && !defined(_WIN64)
     416       63503 :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     417             : #else
     418             :   v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     419             : #endif
     420       63503 :   return String16(buffer);
     421             : }
     422             : 
     423             : // static
     424      297186 : String16 String16::fromDouble(double number) {
     425             :   char arr[50];
     426             :   v8::internal::Vector<char> buffer(arr, arraysize(arr));
     427      297186 :   return String16(DoubleToCString(number, buffer));
     428             : }
     429             : 
     430             : // static
     431           5 : String16 String16::fromDouble(double number, int precision) {
     432             :   std::unique_ptr<char[]> str(
     433           5 :       v8::internal::DoubleToPrecisionCString(number, precision));
     434          10 :   return String16(str.get());
     435             : }
     436             : 
     437         130 : int64_t String16::toInteger64(bool* ok) const {
     438      151403 :   return charactersToInteger(characters16(), length(), ok);
     439             : }
     440             : 
     441      151273 : int String16::toInteger(bool* ok) const {
     442             :   int64_t result = toInteger64(ok);
     443      151273 :   if (ok && *ok) {
     444      147654 :     *ok = result <= std::numeric_limits<int>::max() &&
     445      147654 :           result >= std::numeric_limits<int>::min();
     446             :   }
     447      151273 :   return static_cast<int>(result);
     448             : }
     449             : 
     450         260 : String16 String16::stripWhiteSpace() const {
     451         260 :   if (!length()) return String16();
     452             : 
     453             :   size_t start = 0;
     454         255 :   size_t end = length() - 1;
     455             : 
     456             :   // skip white space from start
     457         915 :   while (start <= end && isSpaceOrNewLine(characters16()[start])) ++start;
     458             : 
     459             :   // only white space
     460         255 :   if (start > end) return String16();
     461             : 
     462             :   // skip white space from end
     463         525 :   while (end && isSpaceOrNewLine(characters16()[end])) --end;
     464             : 
     465         250 :   if (!start && end == length() - 1) return *this;
     466         110 :   return String16(characters16() + start, end + 1 - start);
     467             : }
     468             : 
     469             : String16Builder::String16Builder() = default;
     470             : 
     471    56643430 : void String16Builder::append(const String16& s) {
     472             :   m_buffer.insert(m_buffer.end(), s.characters16(),
     473   113286860 :                   s.characters16() + s.length());
     474    56643430 : }
     475             : 
     476  1277165044 : void String16Builder::append(UChar c) { m_buffer.push_back(c); }
     477             : 
     478    99315548 : void String16Builder::append(char c) {
     479    99315548 :   UChar u = c;
     480    99315548 :   m_buffer.push_back(u);
     481    99315548 : }
     482             : 
     483           0 : void String16Builder::append(const UChar* characters, size_t length) {
     484           0 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     485           0 : }
     486             : 
     487    10175232 : void String16Builder::append(const char* characters, size_t length) {
     488    20350464 :   m_buffer.insert(m_buffer.end(), characters, characters + length);
     489    10175232 : }
     490             : 
     491        6979 : void String16Builder::appendNumber(int number) {
     492             :   constexpr int kBufferSize = 11;
     493             :   char buffer[kBufferSize];
     494        6979 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%d", number);
     495             :   DCHECK_LE(0, chars);
     496       13958 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     497        6979 : }
     498             : 
     499         235 : void String16Builder::appendNumber(size_t number) {
     500             :   constexpr int kBufferSize = 20;
     501             :   char buffer[kBufferSize];
     502             : #if !defined(_WIN32) && !defined(_WIN64)
     503         235 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%zu", number);
     504             : #else
     505             :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%Iu", number);
     506             : #endif
     507             :   DCHECK_LE(0, chars);
     508         470 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     509         235 : }
     510             : 
     511           0 : void String16Builder::appendUnsignedAsHex(uint64_t number) {
     512             :   constexpr int kBufferSize = 17;
     513             :   char buffer[kBufferSize];
     514             :   int chars =
     515           0 :       v8::base::OS::SNPrintF(buffer, kBufferSize, "%016" PRIx64, number);
     516             :   DCHECK_LE(0, chars);
     517           0 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     518           0 : }
     519             : 
     520      301732 : void String16Builder::appendUnsignedAsHex(uint32_t number) {
     521             :   constexpr int kBufferSize = 9;
     522             :   char buffer[kBufferSize];
     523      301732 :   int chars = v8::base::OS::SNPrintF(buffer, kBufferSize, "%08" PRIx32, number);
     524             :   DCHECK_LE(0, chars);
     525      603464 :   m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
     526      301732 : }
     527             : 
     528     4791651 : String16 String16Builder::toString() {
     529     9583302 :   return String16(m_buffer.data(), m_buffer.size());
     530             : }
     531             : 
     532     1867409 : void String16Builder::reserveCapacity(size_t capacity) {
     533     1867409 :   m_buffer.reserve(capacity);
     534     1867409 : }
     535             : 
     536           0 : String16 String16::fromUTF8(const char* stringStart, size_t length) {
     537           0 :   if (!stringStart || !length) return String16();
     538             : 
     539           0 :   std::vector<UChar> buffer(length);
     540             :   UChar* bufferStart = buffer.data();
     541             : 
     542           0 :   UChar* bufferCurrent = bufferStart;
     543           0 :   const char* stringCurrent = stringStart;
     544           0 :   if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
     545           0 :                          bufferCurrent + buffer.size(), nullptr,
     546           0 :                          true) != conversionOK)
     547             :     return String16();
     548             : 
     549           0 :   size_t utf16Length = bufferCurrent - bufferStart;
     550             :   return String16(bufferStart, utf16Length);
     551             : }
     552             : 
     553           0 : std::string String16::utf8() const {
     554             :   size_t length = this->length();
     555             : 
     556           0 :   if (!length) return std::string("");
     557             : 
     558             :   // Allocate a buffer big enough to hold all the characters
     559             :   // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
     560             :   // Optimization ideas, if we find this function is hot:
     561             :   //  * We could speculatively create a CStringBuffer to contain 'length'
     562             :   //    characters, and resize if necessary (i.e. if the buffer contains
     563             :   //    non-ascii characters). (Alternatively, scan the buffer first for
     564             :   //    ascii characters, so we know this will be sufficient).
     565             :   //  * We could allocate a CStringBuffer with an appropriate size to
     566             :   //    have a good chance of being able to write the string into the
     567             :   //    buffer without reallocing (say, 1.5 x length).
     568           0 :   if (length > std::numeric_limits<unsigned>::max() / 3) return std::string();
     569           0 :   std::vector<char> bufferVector(length * 3);
     570           0 :   char* buffer = bufferVector.data();
     571           0 :   const UChar* characters = m_impl.data();
     572             : 
     573             :   ConversionResult result =
     574             :       convertUTF16ToUTF8(&characters, characters + length, &buffer,
     575           0 :                          buffer + bufferVector.size(), false);
     576             :   DCHECK(
     577             :       result !=
     578             :       targetExhausted);  // (length * 3) should be sufficient for any conversion
     579             : 
     580             :   // Only produced from strict conversion.
     581             :   DCHECK(result != sourceIllegal);
     582             : 
     583             :   // Check for an unconverted high surrogate.
     584           0 :   if (result == sourceExhausted) {
     585             :     // This should be one unpaired high surrogate. Treat it the same
     586             :     // was as an unpaired high surrogate would have been handled in
     587             :     // the middle of a string with non-strict conversion - which is
     588             :     // to say, simply encode it to UTF-8.
     589             :     DCHECK((characters + 1) == (m_impl.data() + length));
     590             :     DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF));
     591             :     // There should be room left, since one UChar hasn't been
     592             :     // converted.
     593             :     DCHECK((buffer + 3) <= (buffer + bufferVector.size()));
     594           0 :     putUTF8Triple(buffer, *characters);
     595             :   }
     596             : 
     597           0 :   return std::string(bufferVector.data(), buffer - bufferVector.data());
     598             : }
     599             : 
     600      183867 : }  // namespace v8_inspector

Generated by: LCOV version 1.10