LCOV - code coverage report
Current view: top level - src - uri.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 181 189 95.8 %
Date: 2019-04-17 Functions: 21 24 87.5 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/uri.h"
       6             : 
       7             : #include <vector>
       8             : 
       9             : #include "src/char-predicates-inl.h"
      10             : #include "src/isolate-inl.h"
      11             : #include "src/string-search.h"
      12             : #include "src/unicode-inl.h"
      13             : 
      14             : namespace v8 {
      15             : namespace internal {
      16             : 
      17             : namespace {  // anonymous namespace for DecodeURI helper functions
      18             : bool IsReservedPredicate(uc16 c) {
      19             :   switch (c) {
      20             :     case '#':
      21             :     case '$':
      22             :     case '&':
      23             :     case '+':
      24             :     case ',':
      25             :     case '/':
      26             :     case ':':
      27             :     case ';':
      28             :     case '=':
      29             :     case '?':
      30             :     case '@':
      31             :       return true;
      32             :     default:
      33             :       return false;
      34             :   }
      35             : }
      36             : 
      37             : bool IsReplacementCharacter(const uint8_t* octets, int length) {
      38             :   // The replacement character is at codepoint U+FFFD in the Unicode Specials
      39             :   // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.
      40         252 :   if (length != 3 || octets[0] != 0xEF || octets[1] != 0xBF ||
      41          18 :       octets[2] != 0xBD) {
      42             :     return false;
      43             :   }
      44             :   return true;
      45             : }
      46             : 
      47        5497 : bool DecodeOctets(const uint8_t* octets, int length,
      48             :                   std::vector<uc16>* buffer) {
      49        5497 :   size_t cursor = 0;
      50       10994 :   uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
      51        5731 :   if (value == unibrow::Utf8::kBadChar &&
      52             :       !IsReplacementCharacter(octets, length)) {
      53             :     return false;
      54             :   }
      55             : 
      56        5281 :   if (value <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
      57        9906 :     buffer->push_back(value);
      58             :   } else {
      59         656 :     buffer->push_back(unibrow::Utf16::LeadSurrogate(value));
      60         656 :     buffer->push_back(unibrow::Utf16::TrailSurrogate(value));
      61             :   }
      62             :   return true;
      63             : }
      64             : 
      65     7329961 : int TwoDigitHex(uc16 character1, uc16 character2) {
      66     7329961 :   if (character1 > 'f') return -1;
      67     7329583 :   int high = HexValue(character1);
      68     7329583 :   if (high == -1) return -1;
      69     7329403 :   if (character2 > 'f') return -1;
      70     7329349 :   int low = HexValue(character2);
      71     7329349 :   if (low == -1) return -1;
      72     7329187 :   return (high << 4) + low;
      73             : }
      74             : 
      75             : template <typename T>
      76     2949259 : void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,
      77             :                  bool is_uri, std::vector<T>* buffer) {
      78     2949259 :   if (is_uri && IsReservedPredicate(decoded)) {
      79           0 :     buffer->push_back('%');
      80           0 :     uc16 first = uri_content->Get(index + 1);
      81           0 :     uc16 second = uri_content->Get(index + 2);
      82             :     DCHECK_GT(std::numeric_limits<T>::max(), first);
      83             :     DCHECK_GT(std::numeric_limits<T>::max(), second);
      84             : 
      85           0 :     buffer->push_back(first);
      86           0 :     buffer->push_back(second);
      87             :   } else {
      88     5898509 :     buffer->push_back(decoded);
      89             :   }
      90     2949259 : }
      91             : 
      92        5459 : bool IntoTwoByte(int index, bool is_uri, int uri_length,
      93             :                  String::FlatContent* uri_content, std::vector<uc16>* buffer) {
      94       16039 :   for (int k = index; k < uri_length; k++) {
      95        5584 :     uc16 code = uri_content->Get(k);
      96        5584 :     if (code == '%') {
      97             :       int two_digits;
      98       11168 :       if (k + 2 >= uri_length ||
      99       11168 :           (two_digits = TwoDigitHex(uri_content->Get(k + 1),
     100             :                                     uri_content->Get(k + 2))) < 0) {
     101         294 :         return false;
     102             :       }
     103             :       k += 2;
     104        5584 :       uc16 decoded = static_cast<uc16>(two_digits);
     105        5584 :       if (decoded > unibrow::Utf8::kMaxOneByteChar) {
     106             :         uint8_t octets[unibrow::Utf8::kMaxEncodedSize];
     107        5575 :         octets[0] = decoded;
     108             : 
     109             :         int number_of_continuation_bytes = 0;
     110       27611 :         while ((decoded << ++number_of_continuation_bytes) & 0x80) {
     111       11096 :           if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) {
     112         294 :             return false;
     113             :           }
     114       33072 :           if (uri_content->Get(++k) != '%' ||
     115       22036 :               (two_digits = TwoDigitHex(uri_content->Get(k + 1),
     116             :                                         uri_content->Get(k + 2))) < 0) {
     117             :             return false;
     118             :           }
     119             :           k += 2;
     120             :           uc16 continuation_byte = static_cast<uc16>(two_digits);
     121       11018 :           octets[number_of_continuation_bytes] = continuation_byte;
     122             :         }
     123             : 
     124        5497 :         if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {
     125             :           return false;
     126             :         }
     127             :       } else {
     128           9 :         AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);
     129             :       }
     130             :     } else {
     131           0 :       buffer->push_back(code);
     132             :     }
     133             :   }
     134             :   return true;
     135             : }
     136             : 
     137        5670 : bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
     138             :                        std::vector<uint8_t>* one_byte_buffer,
     139             :                        std::vector<uc16>* two_byte_buffer) {
     140             :   DisallowHeapAllocation no_gc;
     141        5670 :   String::FlatContent uri_content = uri->GetFlatContent(no_gc);
     142             : 
     143             :   int uri_length = uri->length();
     144    40115402 :   for (int k = 0; k < uri_length; k++) {
     145             :     uc16 code = uri_content.Get(k);
     146    20060343 :     if (code == '%') {
     147             :       int two_digits;
     148     5909436 :       if (k + 2 >= uri_length ||
     149     5909418 :           (two_digits = TwoDigitHex(uri_content.Get(k + 1),
     150             :                                     uri_content.Get(k + 2))) < 0) {
     151             :         return false;
     152             :       }
     153             : 
     154     2954709 :       uc16 decoded = static_cast<uc16>(two_digits);
     155     2954709 :       if (decoded > unibrow::Utf8::kMaxOneByteChar) {
     156        5459 :         return IntoTwoByte(k, is_uri, uri_length, &uri_content,
     157        5459 :                            two_byte_buffer);
     158             :       }
     159             : 
     160     2949250 :       AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
     161             :       k += 2;
     162             :     } else {
     163    17105616 :       if (code > unibrow::Utf8::kMaxOneByteChar) {
     164           0 :         return IntoTwoByte(k, is_uri, uri_length, &uri_content,
     165           0 :                            two_byte_buffer);
     166             :       }
     167    34211232 :       one_byte_buffer->push_back(code);
     168             :     }
     169             :   }
     170             :   return true;
     171             : }
     172             : 
     173             : }  // anonymous namespace
     174             : 
     175        5670 : MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,
     176             :                                 bool is_uri) {
     177        5670 :   uri = String::Flatten(isolate, uri);
     178             :   std::vector<uint8_t> one_byte_buffer;
     179             :   std::vector<uc16> two_byte_buffer;
     180             : 
     181        5670 :   if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
     182         312 :     THROW_NEW_ERROR(isolate, NewURIError(), String);
     183             :   }
     184             : 
     185        5358 :   if (two_byte_buffer.empty()) {
     186             :     return isolate->factory()->NewStringFromOneByte(Vector<const uint8_t>(
     187         386 :         one_byte_buffer.data(), static_cast<int>(one_byte_buffer.size())));
     188             :   }
     189             : 
     190             :   Handle<SeqTwoByteString> result;
     191             :   int result_length =
     192       10330 :       static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size());
     193       10330 :   ASSIGN_RETURN_ON_EXCEPTION(
     194             :       isolate, result, isolate->factory()->NewRawTwoByteString(result_length),
     195             :       String);
     196             : 
     197             :   DisallowHeapAllocation no_gc;
     198             :   CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
     199             :             one_byte_buffer.size());
     200        5165 :   CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
     201             :             two_byte_buffer.data(), two_byte_buffer.size());
     202             : 
     203        5165 :   return result;
     204             : }
     205             : 
     206             : namespace {  // anonymous namespace for EncodeURI helper functions
     207    20903733 : bool IsUnescapePredicateInUriComponent(uc16 c) {
     208    20903733 :   if (IsAlphaNumeric(c)) {
     209             :     return true;
     210             :   }
     211             : 
     212     4760936 :   switch (c) {
     213             :     case '!':
     214             :     case '\'':
     215             :     case '(':
     216             :     case ')':
     217             :     case '*':
     218             :     case '-':
     219             :     case '.':
     220             :     case '_':
     221             :     case '~':
     222             :       return true;
     223             :     default:
     224     2956080 :       return false;
     225             :   }
     226             : }
     227             : 
     228             : bool IsUriSeparator(uc16 c) {
     229             :   switch (c) {
     230             :     case '#':
     231             :     case ':':
     232             :     case ';':
     233             :     case '/':
     234             :     case '?':
     235             :     case '$':
     236             :     case '&':
     237             :     case '+':
     238             :     case ',':
     239             :     case '@':
     240             :     case '=':
     241             :       return true;
     242             :     default:
     243             :       return false;
     244             :   }
     245             : }
     246             : 
     247     2968661 : void AddEncodedOctetToBuffer(uint8_t octet, std::vector<uint8_t>* buffer) {
     248     5937322 :   buffer->push_back('%');
     249     8905983 :   buffer->push_back(HexCharOfValue(octet >> 4));
     250     8905983 :   buffer->push_back(HexCharOfValue(octet & 0x0F));
     251     2968661 : }
     252             : 
     253     2955981 : void EncodeSingle(uc16 c, std::vector<uint8_t>* buffer) {
     254     2955981 :   char s[4] = {};
     255             :   int number_of_bytes;
     256             :   number_of_bytes =
     257     2955981 :       unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
     258     8890791 :   for (int k = 0; k < number_of_bytes; k++) {
     259     2967405 :     AddEncodedOctetToBuffer(s[k], buffer);
     260             :   }
     261     2955981 : }
     262             : 
     263         314 : void EncodePair(uc16 cc1, uc16 cc2, std::vector<uint8_t>* buffer) {
     264         314 :   char s[4] = {};
     265             :   int number_of_bytes =
     266         314 :       unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
     267         314 :                             unibrow::Utf16::kNoPreviousCharacter, false);
     268        2826 :   for (int k = 0; k < number_of_bytes; k++) {
     269        1256 :     AddEncodedOctetToBuffer(s[k], buffer);
     270             :   }
     271         314 : }
     272             : 
     273             : }  // anonymous namespace
     274             : 
     275      148018 : MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
     276             :                                 bool is_uri) {
     277      148018 :   uri = String::Flatten(isolate, uri);
     278             :   int uri_length = uri->length();
     279             :   std::vector<uint8_t> buffer;
     280      148022 :   buffer.reserve(uri_length);
     281             : 
     282             :   {
     283             :     DisallowHeapAllocation no_gc;
     284      148033 :     String::FlatContent uri_content = uri->GetFlatContent(no_gc);
     285             : 
     286    41956126 :     for (int k = 0; k < uri_length; k++) {
     287             :       uc16 cc1 = uri_content.Get(k);
     288    20908977 :       if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
     289        5120 :         k++;
     290        5120 :         if (k < uri_length) {
     291             :           uc16 cc2 = uri->Get(k);
     292        5120 :           if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
     293         314 :             EncodePair(cc1, cc2, &buffer);
     294         314 :             continue;
     295             :           }
     296             :         }
     297    20903857 :       } else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
     298    41807468 :         if (IsUnescapePredicateInUriComponent(cc1) ||
     299     2951874 :             (is_uri && IsUriSeparator(cc1))) {
     300    35895516 :           buffer.push_back(cc1);
     301             :         } else {
     302     2955981 :           EncodeSingle(cc1, &buffer);
     303             :         }
     304             :         continue;
     305             :       }
     306             : 
     307             :       AllowHeapAllocation allocate_error_and_return;
     308        4930 :       THROW_NEW_ERROR(isolate, NewURIError(), String);
     309             :     }
     310             :   }
     311             : 
     312      143093 :   return isolate->factory()->NewStringFromOneByte(VectorOf(buffer));
     313             : }
     314             : 
     315             : namespace {  // Anonymous namespace for Escape and Unescape
     316             : 
     317             : template <typename Char>
     318    12884548 : int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
     319    25769096 :   uint16_t character = vector[i];
     320             :   int32_t hi = 0;
     321             :   int32_t lo = 0;
     322    17579480 :   if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
     323     1336878 :       (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
     324     1336446 :       (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
     325      445338 :     *step = 6;
     326      445338 :     return (hi << 8) + lo;
     327    15906752 :   } else if (character == '%' && i <= length - 3 &&
     328    10402626 :              (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
     329     3467056 :     *step = 3;
     330             :     return lo;
     331             :   } else {
     332     8972154 :     *step = 1;
     333     8972154 :     return character;
     334             :   }
     335             : }
     336             : 
     337             : template <typename Char>
     338       55688 : MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
     339             :                                  int start_index) {
     340             :   bool one_byte = true;
     341             :   int length = string->length();
     342             : 
     343             :   int unescaped_length = 0;
     344             :   {
     345             :     DisallowHeapAllocation no_allocation;
     346      111376 :     Vector<const Char> vector = string->GetCharVector<Char>(no_allocation);
     347    12940236 :     for (int i = start_index; i < length; unescaped_length++) {
     348             :       int step;
     349     6442274 :       if (UnescapeChar(vector, i, length, &step) >
     350             :           String::kMaxOneByteCharCode) {
     351             :         one_byte = false;
     352             :       }
     353     6442274 :       i += step;
     354             :     }
     355             :   }
     356             : 
     357             :   DCHECK(start_index < length);
     358             :   Handle<String> first_part =
     359       55688 :       isolate->factory()->NewProperSubString(string, 0, start_index);
     360             : 
     361             :   int dest_position = 0;
     362             :   Handle<String> second_part;
     363             :   DCHECK_LE(unescaped_length, String::kMaxLength);
     364       55688 :   if (one_byte) {
     365             :     Handle<SeqOneByteString> dest = isolate->factory()
     366             :                                         ->NewRawOneByteString(unescaped_length)
     367        2026 :                                         .ToHandleChecked();
     368             :     DisallowHeapAllocation no_allocation;
     369        2026 :     Vector<const Char> vector = string->GetCharVector<Char>(no_allocation);
     370       22167 :     for (int i = start_index; i < length; dest_position++) {
     371             :       int step;
     372             :       dest->SeqOneByteStringSet(dest_position,
     373       10577 :                                 UnescapeChar(vector, i, length, &step));
     374       10577 :       i += step;
     375             :     }
     376        1013 :     second_part = dest;
     377             :   } else {
     378             :     Handle<SeqTwoByteString> dest = isolate->factory()
     379             :                                         ->NewRawTwoByteString(unescaped_length)
     380      109350 :                                         .ToHandleChecked();
     381             :     DisallowHeapAllocation no_allocation;
     382      109350 :     Vector<const Char> vector = string->GetCharVector<Char>(no_allocation);
     383    12918069 :     for (int i = start_index; i < length; dest_position++) {
     384             :       int step;
     385             :       dest->SeqTwoByteStringSet(dest_position,
     386     6431697 :                                 UnescapeChar(vector, i, length, &step));
     387     6431697 :       i += step;
     388             :     }
     389       54675 :     second_part = dest;
     390             :   }
     391       55688 :   return isolate->factory()->NewConsString(first_part, second_part);
     392             : }
     393             : 
     394     6385781 : bool IsNotEscaped(uint16_t c) {
     395     6385781 :   if (IsAlphaNumeric(c)) {
     396             :     return true;
     397             :   }
     398             :   //  @*_+-./
     399             :   switch (c) {
     400             :     case '@':
     401             :     case '*':
     402             :     case '_':
     403             :     case '+':
     404             :     case '-':
     405             :     case '.':
     406             :     case '/':
     407             :       return true;
     408             :     default:
     409     1737092 :       return false;
     410             :   }
     411             : }
     412             : 
     413             : template <typename Char>
     414      192974 : static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
     415             :                                            Handle<String> source) {
     416             :   int index;
     417             :   {
     418             :     DisallowHeapAllocation no_allocation;
     419             :     StringSearch<uint8_t, Char> search(isolate, StaticCharVector("%"));
     420      385946 :     index = search.Search(source->GetCharVector<Char>(no_allocation), 0);
     421      192972 :     if (index < 0) return source;
     422             :   }
     423       55688 :   return UnescapeSlow<Char>(isolate, source, index);
     424             : }
     425             : 
     426             : template <typename Char>
     427       37287 : static MaybeHandle<String> EscapePrivate(Isolate* isolate,
     428             :                                          Handle<String> string) {
     429             :   DCHECK(string->IsFlat());
     430             :   int escaped_length = 0;
     431             :   int length = string->length();
     432             : 
     433             :   {
     434             :     DisallowHeapAllocation no_allocation;
     435       74574 :     Vector<const Char> vector = string->GetCharVector<Char>(no_allocation);
     436     6667679 :     for (int i = 0; i < length; i++) {
     437     6630392 :       uint16_t c = vector[i];
     438     3308679 :       if (c >= 256) {
     439      121473 :         escaped_length += 6;
     440     3193723 :       } else if (IsNotEscaped(c)) {
     441     2325177 :         escaped_length++;
     442             :       } else {
     443      868546 :         escaped_length += 3;
     444             :       }
     445             : 
     446             :       // We don't allow strings that are longer than a maximal length.
     447             :       DCHECK_LT(String::kMaxLength, 0x7FFFFFFF - 6);   // Cannot overflow.
     448     3315196 :       if (escaped_length > String::kMaxLength) break;  // Provoke exception.
     449             :     }
     450             :   }
     451             : 
     452             :   // No length change implies no change.  Return original string if no change.
     453       37287 :   if (escaped_length == length) return string;
     454             : 
     455             :   Handle<SeqOneByteString> dest;
     456       73170 :   ASSIGN_RETURN_ON_EXCEPTION(
     457             :       isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
     458             :       String);
     459             :   int dest_position = 0;
     460             : 
     461             :   {
     462             :     DisallowHeapAllocation no_allocation;
     463       73170 :     Vector<const Char> vector = string->GetCharVector<Char>(no_allocation);
     464     6663647 :     for (int i = 0; i < length; i++) {
     465     6627062 :       uint16_t c = vector[i];
     466     3308679 :       if (c >= 256) {
     467             :         dest->SeqOneByteStringSet(dest_position, '%');
     468      121473 :         dest->SeqOneByteStringSet(dest_position + 1, 'u');
     469      242946 :         dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
     470      121473 :         dest->SeqOneByteStringSet(dest_position + 3,
     471      121473 :                                   HexCharOfValue((c >> 8) & 0xF));
     472      121473 :         dest->SeqOneByteStringSet(dest_position + 4,
     473      121473 :                                   HexCharOfValue((c >> 4) & 0xF));
     474      242946 :         dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xF));
     475      121473 :         dest_position += 6;
     476     3192058 :       } else if (IsNotEscaped(c)) {
     477             :         dest->SeqOneByteStringSet(dest_position, c);
     478     2323512 :         dest_position++;
     479             :       } else {
     480             :         dest->SeqOneByteStringSet(dest_position, '%');
     481     1737092 :         dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
     482     1737092 :         dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xF));
     483      868546 :         dest_position += 3;
     484             :       }
     485             :     }
     486             :   }
     487             : 
     488       36585 :   return dest;
     489             : }
     490             : 
     491             : }  // Anonymous namespace
     492             : 
     493       37287 : MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
     494             :   Handle<String> result;
     495       37287 :   string = String::Flatten(isolate, string);
     496       37287 :   return String::IsOneByteRepresentationUnderneath(*string)
     497             :              ? EscapePrivate<uint8_t>(isolate, string)
     498       37287 :              : EscapePrivate<uc16>(isolate, string);
     499             : }
     500             : 
     501      192969 : MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
     502             :   Handle<String> result;
     503      192969 :   string = String::Flatten(isolate, string);
     504      192975 :   return String::IsOneByteRepresentationUnderneath(*string)
     505             :              ? UnescapePrivate<uint8_t>(isolate, string)
     506      192977 :              : UnescapePrivate<uc16>(isolate, string);
     507             : }
     508             : 
     509             : }  // namespace internal
     510      121996 : }  // namespace v8

Generated by: LCOV version 1.10